We extract and create Seurat objects of each feature matrix

raw_dir <- "../raw_data/Tomato/filtered_feature_bc_matrix"
Embryo1 <- Read10X(data.dir = raw_dir)
Embryo1 <- CreateSeuratObject(counts = Embryo1, min.cells = 3, min.features = 200)
cells_OK <- colnames(Embryo1)
Embryo1@meta.data$Condition <- "N123LOF"
raw_dir <- "../raw_data/YFP/filtered_feature_bc_matrix"
Embryo2 <- Read10X(data.dir = raw_dir)
Embryo2 <- CreateSeuratObject(counts = Embryo2, min.cells = 3, min.features = 200)
Embryo2@meta.data$Condition <- "N123WT"

0.1 Merging Datasets

# Merge datasets

Embryo <- merge(Embryo1, Embryo2, project = "Embryo_N123LOF_Mosaic")
## Warning: Some cell names are duplicated across objects provided. Renaming to
## enforce unique cell names.
table(Embryo@meta.data$Condition)
## 
## N123LOF  N123WT 
##   10709   10748
rm(Embryo1.RbpjLOF, Embryo2.RbpjWt)
## Warning in rm(Embryo1.RbpjLOF, Embryo2.RbpjWt): object 'Embryo1.RbpjLOF' not
## found
## Warning in rm(Embryo1.RbpjLOF, Embryo2.RbpjWt): object 'Embryo2.RbpjWt' not
## found
# saveRDS(Embryo, "../rds/EmbryoN123.raw.rds")
# Embryo <- readRDS("../rds/EmbryoN123.raw.rds")

0.2 Starting QC Analysis

Embryo[["percent.mt"]] <- PercentageFeatureSet(Embryo, pattern = "^mt-")

mito_genes <- rownames(Embryo)[grep("^mt-", rownames(Embryo))]

mito_genes
##  [1] "mt-Nd1"  "mt-Nd2"  "mt-Co1"  "mt-Co2"  "mt-Atp8" "mt-Atp6" "mt-Co3" 
##  [8] "mt-Nd3"  "mt-Nd4l" "mt-Nd4"  "mt-Nd5"  "mt-Nd6"  "mt-Cytb"
plot1 <- FeatureScatter(Embryo, feature1 = "nFeature_RNA",
                        group.by = "orig.ident", feature2 = "percent.mt") +
  geom_vline(xintercept = c(500,7500),linetype = 2 ) +
  geom_hline(yintercept = 15 ,linetype = 2)
plot2 <- FeatureScatter(Embryo, feature1 = "nCount_RNA",
                        group.by = "orig.ident",feature2 = "nFeature_RNA")+
  geom_hline(yintercept = c(500,7500),linetype = 2 )+
  geom_vline(xintercept = c(1000,55000),linetype = 2 )

# cairo_pdf("../Plots/QC/Scatterplot_features.pdf", height = 6, width = 8)
plot1 / plot2

# dev.off()

# Ribosomal genes

Embryo <- PercentageFeatureSet(Embryo, "^Rp[sl]", col.name = "percent_ribo")

# Hemoglobin genes

Embryo <- PercentageFeatureSet(Embryo, "^Hb[^(p)]", col.name = "percent_hb")

# Violin Plots

feats <- c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent_ribo", "percent_hb")
# cairo_pdf("../Plots/QC/VlnPlot_features.pdf", height = 6, width = 8)
VlnPlot(Embryo, group.by = "orig.ident", features = feats, pt.size = 0.1, ncol = 3) +
  NoLegend()
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

# dev.off()

0.3 Features to cut out

Cells with less than 500 genes (low quality) and more than 7500 (possibly cell doublets) were removed.Cell with less than 1000 transcripts and more than 55000 were removed.

Cells with more than 10% mitochondrial genes, more than 35% ribosomal genes and more than 1% hemoglobin genes were removed.

# Clear up the plots

Embryo <- subset(Embryo,
                 subset = nFeature_RNA > 500 & nFeature_RNA < 7500 &
                   nCount_RNA > 1000 & nCount_RNA < 55000  & percent.mt < 10 & percent_ribo < 35 & percent_hb < 1)

0.4 Normalizing and Scaling Data, Followed by selecting the 2000 most highly variable genes (HVG)

# Normalize data

Embryo <- NormalizeData(Embryo, 
                                normalization.method = "LogNormalize",
                                scale.factor = 10000)
## Normalizing layer: counts.1
## Normalizing layer: counts.2
# Identification of highly variable features (feature selection)

Embryo <- FindVariableFeatures(Embryo, 
                                       selection.method = "vst",
                                       nfeatures = 2000)
## Finding variable features for layer counts.1
## Finding variable features for layer counts.2
top10 <- head(VariableFeatures(Embryo), 10)
top10
##  [1] "Actc1" "Mylpf" "Ttn"   "Myl1"  "Tnnc1" "Acta2" "Myog"  "Tnni1" "Myl4" 
## [10] "Nefm"
vars <- VariableFeatures(Embryo)

vars <- as.data.frame(vars) 

vars %>%  filter(!grepl("^Hb[^(p)]", 1)) %>% filter(!grepl("^mt-", 1)) %>% filter(!grepl("^Rp[sl]", 1)) %>% nrow()
## [1] 2000
# There are no mitochondrial, ribosomal or hemmoglobine related genes in the to 2000 variable Features

plot1 <- VariableFeaturePlot(Embryo)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
## When using repel, set xnudge and ynudge to 0 for optimal results
# cairo_pdf("../Plots/QC/Variable_features.pdf", height = 6, width = 8)
plot2
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
## Warning: Removed 305 rows containing missing values or values outside the scale range
## (`geom_point()`).

# dev.off()

# Scaling the data

all.genes <- rownames(Embryo)
Embryo <- ScaleData(Embryo, features = all.genes)
## Centering and scaling data matrix

0.5 Perform Linear Dimensional Reduction

## Perform linear dimensional reduction

Embryo <- RunPCA(Embryo,
                         features = VariableFeatures(object = Embryo))
## PC_ 1 
## Positive:  Rtn1, Tubb3, Stmn3, Elavl3, Dcx, Map2, Ina, Crmp1, Myt1l, Tagln3 
##     Rims2, Akap6, Scg5, Stmn2, Soga3, Kif5c, Nsg2, Elavl2, Rab3c, Cadps 
##     Elavl4, Rufy3, Scg3, Mapt, Mllt11, Pcsk1n, Tubb2b, Celf4, Igfbpl1, Dpysl5 
## Negative:  Lgals1, Gpc3, Sparc, Col1a2, Col3a1, Fbn2, Tpm1, Col1a1, Hmga2, Fn1 
##     Fos, Anxa2, Mest, Twist1, Prrx1, Jun, Col5a2, Peg3, Ldha, Dlc1 
##     Hmgb2, Id1, Fbxl7, Hmcn1, Cyba, Cped1, Itm2a, Ptn, S100a10, Dab2 
## PC_ 2 
## Positive:  Gmfg, Lcp1, Smagp, Icam2, Esam, Nrros, Ptpn18, Fxyd5, Gimap6, Inpp5d 
##     Cldn5, S100a16, Gimap1, Cd93, Cdh5, Plvap, Tgfb1, Pecam1, Rasgrp3, Myct1 
##     Samsn1, Kdr, Gatm, Flt1, Gngt2, Stab1, Laptm5, Plxnd1, Lmo2, Tagln2 
## Negative:  Gpc3, Col1a2, Col3a1, Fbn2, Aff3, Rbms3, Col1a1, Gpc6, Ptprd, Ptn 
##     Adgrl3, Bnc2, Zfhx4, Nfib, Fbxl7, Tenm3, Pbx1, Nfia, Peg3, Ror1 
##     Col5a2, Fn1, Robo1, Rora, Efna5, Prrx1, Vcan, Sema3a, Aff2, Slit3 
## PC_ 3 
## Positive:  Actc1, Ttn, Myog, Tnnt2, Tnni1, Myl1, Klhl41, Neb, Trim55, Mymk 
##     Des, Mylpf, Tnnc1, Atp2a1, Tnnt1, Myh3, Myod1, Cryab, Mylk4, Myl4 
##     Chrna1, Synpo2l, Ablim3, Il17b, Hspb2, Smyd1, Unc45b, Actn2, Apobec2, Pgam2 
## Negative:  Hmgb2, Top2a, H2az1, Cdca8, Nusap1, Mki67, Kif11, Pclaf, Prc1, H2ax 
##     Birc5, Cenpf, Spc25, Aurkb, Tpx2, H1f5, Smc4, Cks2, Ube2c, H3c3 
##     Ckap2l, Cenpe, Cdk1, H2ac24, H2ac8, Cdca3, Incenp, H4c4, Mis18bp1, H2ac4 
## PC_ 4 
## Positive:  Top2a, Nusap1, Cdca8, Prc1, Cenpf, Kif11, Spc25, H3c3, Hmgb2, Ube2c 
##     Mki67, Tpx2, H1f5, H2ac4, Pclaf, H2ac8, Cks2, Cenpe, Aurkb, Knl1 
##     Ckap2l, Smc4, Mis18bp1, H2ac24, H2az1, Birc5, Hmmr, H2bc18, Cdca3, Fbxo5 
## Negative:  Cdh5, Kdr, Icam2, Esam, Cldn5, Gimap6, Pecam1, Flt1, Cd93, Emcn 
##     Plvap, Ecscr, Rasgrp3, Myct1, Plxnd1, Gmfg, Smagp, Gimap1, Egfl7, Gngt2 
##     Lcp1, Eng, Adgrf5, Fxyd5, Tie1, Cd34, Rasip1, Ctla2a, Adgrl4, Ramp2 
## PC_ 5 
## Positive:  Cdh5, Esam, Cldn5, Plvap, Kdr, Emcn, Flt1, Ecscr, Icam2, Pecam1 
##     Myct1, Cd93, S100a16, Tie1, Adgrf5, Rasip1, Ptprb, Gimap6, Gimap4, Klhl4 
##     Eng, Plxnd1, Fam167b, Sox17, Apold1, Aplnr, Adgrl4, Ramp2, Sox18, Cyyr1 
## Negative:  Rac2, Tyrobp, Spi1, Fcer1g, Lst1, Ptprc, Psmb8, Ptpn6, Cd53, Coro1a 
##     Fcgr3, Laptm5, Ms4a6c, Cd52, Dock2, Arhgap30, Plek, Ly86, Csf1r, Fyb 
##     Gpr65, Cyth4, Cd86, Ctsc, Ctss, Ncf2, Bin2, C1qb, Aif1, Evi2a
# Examine and visualize PCA results a few different ways

# print(Embryo[["pca"]], dims = 1:5, nfeatures = 5)
VizDimLoadings(Embryo, dims = 1:2, reduction = "pca")

DimPlot(Embryo,group.by = "orig.ident", reduction = "pca")

# Determine the 'dimensionality' of the dataset

# Embryo <- JackStraw(Embryo, num.replicate = 100, dims = 30)
# Embryo <- ScoreJackStraw(Embryo, dims = 1:30)
# JackStrawPlot(Embryo, dims = 1:30)

# cairo_pdf("../Plots/QC/ElbowPlot.pdf", height = 6, width = 6)
ElbowPlot(Embryo, ndims = 30)

# dev.off()
DimHeatmap(Embryo, dims = 1:9, cells = 500, balanced = TRUE)

Out of the ElbowPlot it looks like 20 Dimensions is a good number of PCAs to go for

# Cluster the cells

Embryo <- FindNeighbors(Embryo, dims = 1:20, )
## Computing nearest neighbor graph
## Computing SNN
Embryo <- FindClusters(Embryo, resolution = 0.1, verbose = F)
Embryo <- FindClusters(Embryo, resolution = 0.2, verbose = F)
Embryo <- FindClusters(Embryo, resolution = 0.35, verbose = F)


#Run UMAP

Embryo <- RunUMAP(Embryo, dims = 1:20,  verbose = F)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## Found more than one class "dist" in cache; using the first, from namespace 'spam'
## Also defined by 'BiocGenerics'
## Found more than one class "dist" in cache; using the first, from namespace 'spam'
## Also defined by 'BiocGenerics'
# cairo_pdf("../Plots/QC/UMAP_res0.35.pdf", height = 6, width = 6)
DimPlot(Embryo, group.by = "RNA_snn_res.0.35")

# dev.off()

# cairo_pdf("../Plots/QC/UMAP_res0.35.split.pdf", height = 6, width = 6)
DimPlot(Embryo, group.by = "RNA_snn_res.0.35", split.by = "Condition")

# dev.off()

Idents(Embryo) <- "Condition"

table(Embryo@active.ident)
## 
## N123LOF  N123WT 
##   10053    9622
 # saveRDS(Embryo, "../rds/EmbryoN123.withDoublets.rds")

0.6 Run DoubletFinder

# Embryo <- readRDS("../rds/EmbryoN123.withDoublets.rds")

Since there are no HTOs or multiplexing, it is a good idea to run doubletfinder to rule out doublets

# Doublet Finder. Until we do not have a clear cluster identity it is better that we not use Doublet Finder

# I will split the datasets in Wt v LOF because if not, fake doublets will be produced

EmbryoLOF <- subset(Embryo, subset = Condition == "N123LOF")
EmbryoWt <- subset(Embryo, subset = Condition == "N123LOF", invert = T)


EmbryoLOF <- FindVariableFeatures(EmbryoLOF, 
                               selection.method = "vst",
                               nfeatures = 2000)
## Finding variable features for layer counts.1
top10 <- head(VariableFeatures(EmbryoLOF), 10)
top10
##  [1] "Actc1" "Mylpf" "Myl1"  "Tnnc1" "Ttn"   "Myl4"  "Tnnc2" "Acta2" "Tnni1"
## [10] "Acta1"
all.genes <- rownames(EmbryoLOF)
EmbryoLOF <- ScaleData(EmbryoLOF, features = all.genes)
## Centering and scaling data matrix
## Warning: Different features in new layer data than already exists for
## scale.data
## Perform linear dimensional reduction

EmbryoLOF <- RunPCA(EmbryoLOF,
                 features = VariableFeatures(object = EmbryoLOF))
## PC_ 1 
## Positive:  Esam, Icam2, Cldn5, Cdh5, S100a16, Plvap, Cd93, Kdr, Flt1, Pecam1 
##     Myct1, Gimap6, Gimap1, Rasgrp3, Ecscr, Gimap4, Fam167b, Tie1, Emcn, Plxnd1 
##     Smagp, Egfl7, Gmfg, Rasip1, Adgrl4, Ctla2a, Fxyd5, Gngt2, Eng, Ptprb 
## Negative:  Ptprd, Col1a2, Adgrl3, Aff3, Gpc6, Ptn, Tenm3, Col3a1, Rbms3, Pbx1 
##     Fbn2, Zfhx4, Col1a1, Nfia, Robo1, Ank3, Nnat, Nfib, Bnc2, Mmp16 
##     Efna5, Ncam1, Pcdh9, Rora, Magi2, Ror1, Peg3, Cacna1c, Tenm4, Enox1 
## PC_ 2 
## Positive:  Actc1, Ttn, Arpp21, Myl1, Tnni1, Tnnc1, Ablim3, Klhl41, Mylpf, Tnnt2 
##     Myog, Myh3, Trim55, Myl4, Neb, Cryab, Actn2, Mymk, Il17b, Cdkn1a 
##     Des, Hspb2, Apobec2, Synpo2l, Unc45b, Tnnt1, Atp2a1, Rbm24, Lmod3, Acta1 
## Negative:  Hmgb2, Top2a, Birc5, Pclaf, H2ax, H2az1, Mki67, Spc24, Cdca8, Kif11 
##     Cks2, Nusap1, Tpx2, Smc4, Cdk1, Spc25, Cenpf, Cenpe, Aurkb, Ube2c 
##     Prc1, H3c3, Ckap2l, H1f5, Kif15, Ccna2, Mis18bp1, Cdca3, Kif23, Hmmr 
## PC_ 3 
## Positive:  Actc1, Top2a, Ttn, Mylpf, Nusap1, Klhl41, Myog, Myl1, Kif11, Cdca8 
##     Ube2c, Mki67, Spc24, Tnnc1, Prc1, Myh3, Tnni1, Cks2, Cenpf, Spc25 
##     Tnnt2, Pclaf, Hmgb2, Trim55, Tpx2, Birc5, Neb, Aurkb, Mymk, Hmmr 
## Negative:  Cdh5, Kdr, Cldn5, Flt1, Pecam1, Esam, Icam2, Emcn, Cd93, Plvap 
##     Gimap6, Myct1, Rasip1, Plxnd1, Tie1, Rasgrp3, Adgrf5, Gimap1, Ecscr, Egfl7 
##     Adgrl4, Eng, Gimap4, Cd34, Fam167b, Ctla2a, Gngt2, Ptprb, Fxyd5, Klhl4 
## PC_ 4 
## Positive:  Lgals1, Sparc, Cdkn1c, Col1a2, Tpm1, Col3a1, Col1a1, Itm2a, Nfib, Fbn2 
##     Ogn, Fos, Jun, Ptn, Peg3, Dlk1, Col5a2, Anxa2, Fbxl7, Tgfb2 
##     Mest, Nfia, Igf1, Igfbp5, Palld, Fn1, Tpm2, Svil, Dcn, Rps2 
## Negative:  Ina, Stmn3, Elavl3, Rtn1, Tubb3, Pcsk1n, Mapt, Soga3, Scg5, Rims2 
##     Scg3, Myt1l, Crmp1, Rab3c, Tagln3, Elavl4, Ptprn2, Cadps, Dcx, Actl6b 
##     Snap25, Srrm4, Stmn2, Elavl2, Ttc9b, Dpysl5, Map2, L1cam, Gpm6a, Nsg2 
## PC_ 5 
## Positive:  Fbxl7, Rbms3, Col3a1, Ptprd, Zeb1, Nfia, Col1a1, Nfib, Col1a2, Col5a2 
##     Atrnl1, Sdk1, Prkg1, Akap12, Fbn2, Fn1, Dlc1, Ror1, Pard3b, Sparc 
##     Diaph3, Robo1, Mcc, Mmp16, Klf12, Colec12, Lama4, Pdzrn3, Gpc6, Tshz2 
## Negative:  Rps2, Krt14, Krt5, Fermt1, Sfn, Krt15, Perp, Wnt7b, Tfap2b, Wnt3 
##     Epcam, Wnt6, Pdgfa, Gjb2, Trp63, Mt1, Kremen2, Grhl2, Lmo1, Lgals7 
##     Mt2, Bcam, Dsp, Ppp1r14b, Fxyd3, Wnt4, Cxcl14, Chchd10, Msx1, Hoxd13
ElbowPlot(EmbryoLOF, ndims = 30)

# In the case of Liver3_2Jul21 we can set up for 16 dimensions

# Cluster the cells

EmbryoLOF <- FindNeighbors(EmbryoLOF, dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
EmbryoLOF <- FindClusters(EmbryoLOF, resolution = 0.1, verbose = F)
EmbryoLOF <- FindClusters(EmbryoLOF, resolution = 0.2, verbose = F)
EmbryoLOF <- FindClusters(EmbryoLOF, resolution = 0.35, verbose = F)

#Run UMAP

EmbryoLOF <- RunUMAP(EmbryoLOF, dims = 1:20, verbose = F)
## Found more than one class "dist" in cache; using the first, from namespace 'spam'
## Also defined by 'BiocGenerics'
## Found more than one class "dist" in cache; using the first, from namespace 'spam'
## Also defined by 'BiocGenerics'
DimPlot(EmbryoLOF, group.by = "RNA_snn_res.0.35")

# DoubletFinder can be broken up into 4 steps: (1) Generate artificial doublets from existing scRNA-seq data (2) Pre-process merged real-artificial data (3) Perform PCA and use the PC distance matrix to find each cell’s proportion of artificial k nearest neighbors (pANN) (4) Rank order and threshold pANN values according to the expected number of doublets

# pK Identification (no ground-truth)
sweep.res.list <- paramSweep(EmbryoLOF, PCs = 1:20, sct = FALSE)
## Loading required package: fields
## Loading required package: spam
## Spam version 2.10-0 (2023-10-23) is loaded.
## Type 'help( Spam)' or 'demo( spam)' for a short introduction 
## and overview of this package.
## Help for individual functions is also obtained by adding the
## suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
## 
## Attaching package: 'spam'
## The following object is masked from 'package:stats4':
## 
##     mle
## The following objects are masked from 'package:base':
## 
##     backsolve, forwardsolve
## Loading required package: viridisLite
## 
## Try help(fields) to get started.
## Loading required package: parallel
## [1] "Creating artificial doublets for pN = 5%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## Warning in PrepDR5(object = object, features = features, layer = layer, : The
## following features were not available: Gm15433, Lgsn, Glrp1, Gm39653, Pdcd1,
## Gm3646, Slamf7, Ankar, Slamf8, Ifi209, A630095N17Rik, Marco, Spata3, Olfr1256,
## Olfr32, Mylk2, Fcnb, Rbbp8nl, 4930404H24Rik, Birc7, Bpifb2, Bpifb1, Actl10,
## Lime1, Ptf1a, Duoxa2, Wfdc15b, Zswim2, Ndor1, Fut7, A26c3, Erv3, Pck1, Olfr259,
## Olfr73, Cstl1, Arhgap40, Gm15097, Gm14569, Ezhip, Tcp11x2, 4930513O06Rik,
## Gm10490, Tex13b, Gm29133, 4930558C23Rik, Rpe65, Etv3l, Ttc24, Hsd3b5, Pmp2,
## Slc10a5, Slc7a12, Aadacl2, Sucnr1, Ghsr, Clca4b, Chia1, A630076J17Rik, Lce1e,
## Lce3c, Gm17662, Ldc1, Orm2, Uts2, Hes2, Il22ra1, Srarp, Ccl21d, Sit1, Olfr71,
## Toporsl, Dspp, Pdx1, Pdcl2, Gm8879, Aym1, Tmprss11c, Piwil1, 4930519G04Rik,
## Gm21663, Odaph, Gm1979, Htr5a, Oas1e, Pilrb1, Olfr718-ps1, Il23r, Prss2,
## Slco1a4, Clec4e, Vmn2r24, Vmn2r27, Vmn1r46, V1ra8, Gimap7, Doxl1, Reg3b,
## Klrb1b, Clec2i, Olr1, Klrk1, Klri1, Eif4a3l1, Atoh1, Nat8, Igbp1b, Stra8,
## Olfr460, Gm7298, Trim30b, A26c2, Gm52993, Ceacam18, Cnga4, Cyp2a5, Vmn2r66,
## Vmn2r76, 4930451I11Rik, Gvin1, Olfr472, Nanos2, Nccrp1, Pira2, Pira12,
## Ceacam16, Lilra5, Gfy, Pth2, Irgc1, Vmn2r28, Gm20715, Vmn2r49, Vmn1r84, Cdcp3,
## Ffar3, Acsm1, Mrgpra9, Mrgpra2a, Scgb1b30, Odf3, Olfr558, Olfr560, Olfr571,
## Sult3a1, Rfpl4b, Gm49339, Gm5426, Hsd17b6, Glipr1l1, Taf7l2, Neurog3, Olfr1356,
## Elane, A230072I06Rik, Mgat4d, Spata4, Defb39, Defa21, Hapln4, Ido1, Olfr374,
## Ces2f, Nxnl1, Gm7697, Cd209a, Gm4491, Acod1, Prss52, Slc15a1, Olfr728, Galnt15,
## Olfr1512, 1700024G13Rik, Olfr49, Rgr, Gm8104, Gm8267, Mcpt8, Ctsg, Gzmc, Gzmb,
## Gm8050, Gm3298, Gm8108, Pate13, Cyp19a1, Olfr921, Olfr986, Bcl2a1a, Bsx,
## Cyp1a1, Olfr873, Ccdc153, Ccr8, Mobp, Ccr3, Gm10721, Olfr323, Olfr322, Krt26,
## Krt27, Krt40, Krt33a, Krt35, St6galnac2.1, Fam71b, Gm5431, 9930111J21Rik1,
## Slc13a2, Sebox, Fam187a, E030025P04Rik, 9930022D16Rik, Olfr463, 1700012B07Rik,
## Elobl, 4930544D05Rik, Gpr142, Cd300lb, Cd300c, Cd300ld3, H3f4, Vmn1r218,
## Slc17a2, Prl8a2, Serpinb1c, Akr1c21, Gzma, D13Ertd608e, Gm21188, BC147527,
## S100z, Fam240b, Lrrc74a, Serpina12, Serpina3b, Serpina3m, Gm49366, Mroh2b,
## Glycam1, Gsdmc, Slurp1, Gml, Ly6f, Dcstamp, Aqp6, Foxh1, Krt6a, Krt73, Sstr3,
## Krtap7-1, Tex55, Gpr15, Gm21833, Olfr15, Septin12, Rtp1, Rtp2, Tmem207, Stfa2,
## Muc20, A530064D06Rik, Prss33, Prss30, H2-Ob, Btnl2, Tnfsf14, H2-Q1, Prss34,
## Tpsg1, Ccr6, Nhlrc4, Olfr111, Olfr131, Esp8, Vmn2r93, Vmn2r100, Slc22a7, Tff1,
## Cyp4f40, Olfr55, Olfr63, Olfr93, Glyatl3, Kdm5d, Dsg1b, Spink7, Pcdha6, Pcdha7,
## Pcdhac1, Cplx4, Gpr151, Spink6, Gm50364, 1700019N19Rik, Slc22a12, Opalin,
## Slc22a19, Pga5, Ms4a15, Ms4a4c, Gm8369, Cyp17a1, ENSMUSG00000079800.
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.001..."
## [1] "pK = 0.005..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 10%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## Warning in PrepDR5(object = object, features = features, layer = layer, : The
## following features were not available: Gm15433, Lgsn, Glrp1, Gm39653, Pdcd1,
## Gm3646, Slamf7, Ankar, Slamf8, Ifi209, A630095N17Rik, Marco, Spata3, Olfr1256,
## Olfr32, Mylk2, Fcnb, Rbbp8nl, 4930404H24Rik, Birc7, Bpifb2, Bpifb1, Actl10,
## Lime1, Ptf1a, Duoxa2, Wfdc15b, Zswim2, Ndor1, Fut7, A26c3, Erv3, Pck1, Olfr259,
## Olfr73, Cstl1, Arhgap40, Gm15097, Gm14569, Ezhip, Tcp11x2, 4930513O06Rik,
## Gm10490, Tex13b, Gm29133, 4930558C23Rik, Rpe65, Etv3l, Ttc24, Hsd3b5, Pmp2,
## Slc10a5, Slc7a12, Aadacl2, Sucnr1, Ghsr, Clca4b, Chia1, A630076J17Rik, Lce1e,
## Lce3c, Gm17662, Ldc1, Orm2, Uts2, Hes2, Il22ra1, Srarp, Ccl21d, Sit1, Olfr71,
## Toporsl, Dspp, Pdx1, Pdcl2, Gm8879, Aym1, Tmprss11c, Piwil1, 4930519G04Rik,
## Gm21663, Odaph, Gm1979, Htr5a, Oas1e, Pilrb1, Olfr718-ps1, Il23r, Prss2,
## Slco1a4, Clec4e, Vmn2r24, Vmn2r27, Vmn1r46, V1ra8, Gimap7, Doxl1, Reg3b,
## Klrb1b, Clec2i, Olr1, Klrk1, Klri1, Eif4a3l1, Atoh1, Nat8, Igbp1b, Stra8,
## Olfr460, Gm7298, Trim30b, A26c2, Gm52993, Ceacam18, Cnga4, Cyp2a5, Vmn2r66,
## Vmn2r76, 4930451I11Rik, Gvin1, Olfr472, Nanos2, Nccrp1, Pira2, Pira12,
## Ceacam16, Lilra5, Gfy, Pth2, Irgc1, Vmn2r28, Gm20715, Vmn2r49, Vmn1r84, Cdcp3,
## Ffar3, Acsm1, Mrgpra9, Mrgpra2a, Scgb1b30, Odf3, Olfr558, Olfr560, Olfr571,
## Sult3a1, Rfpl4b, Gm49339, Gm5426, Hsd17b6, Glipr1l1, Taf7l2, Neurog3, Olfr1356,
## Elane, A230072I06Rik, Mgat4d, Spata4, Defb39, Defa21, Hapln4, Ido1, Olfr374,
## Ces2f, Nxnl1, Gm7697, Cd209a, Gm4491, Acod1, Prss52, Slc15a1, Olfr728, Galnt15,
## Olfr1512, 1700024G13Rik, Olfr49, Rgr, Gm8104, Gm8267, Mcpt8, Ctsg, Gzmc, Gzmb,
## Gm8050, Gm3298, Gm8108, Pate13, Cyp19a1, Olfr921, Olfr986, Bcl2a1a, Bsx,
## Cyp1a1, Olfr873, Ccdc153, Ccr8, Mobp, Ccr3, Gm10721, Olfr323, Olfr322, Krt26,
## Krt27, Krt40, Krt33a, Krt35, St6galnac2.1, Fam71b, Gm5431, 9930111J21Rik1,
## Slc13a2, Sebox, Fam187a, E030025P04Rik, 9930022D16Rik, Olfr463, 1700012B07Rik,
## Elobl, 4930544D05Rik, Gpr142, Cd300lb, Cd300c, Cd300ld3, H3f4, Vmn1r218,
## Slc17a2, Prl8a2, Serpinb1c, Akr1c21, Gzma, D13Ertd608e, Gm21188, BC147527,
## S100z, Fam240b, Lrrc74a, Serpina12, Serpina3b, Serpina3m, Gm49366, Mroh2b,
## Glycam1, Gsdmc, Slurp1, Gml, Ly6f, Dcstamp, Aqp6, Foxh1, Krt6a, Krt73, Sstr3,
## Krtap7-1, Tex55, Gpr15, Gm21833, Olfr15, Septin12, Rtp1, Rtp2, Tmem207, Stfa2,
## Muc20, A530064D06Rik, Prss33, Prss30, H2-Ob, Btnl2, Tnfsf14, H2-Q1, Prss34,
## Tpsg1, Ccr6, Nhlrc4, Olfr111, Olfr131, Esp8, Vmn2r93, Vmn2r100, Slc22a7, Tff1,
## Cyp4f40, Olfr55, Olfr63, Olfr93, Glyatl3, Kdm5d, Dsg1b, Spink7, Pcdha6, Pcdha7,
## Pcdhac1, Cplx4, Gpr151, Spink6, Gm50364, 1700019N19Rik, Slc22a12, Opalin,
## Slc22a19, Pga5, Ms4a15, Ms4a4c, Gm8369, Cyp17a1, ENSMUSG00000079800.
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.001..."
## [1] "pK = 0.005..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 15%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## Warning in PrepDR5(object = object, features = features, layer = layer, : The
## following features were not available: Gm15433, Lgsn, Glrp1, Gm39653, Pdcd1,
## Gm3646, Slamf7, Ankar, Slamf8, Ifi209, A630095N17Rik, Marco, Spata3, Olfr1256,
## Olfr32, Mylk2, Fcnb, Rbbp8nl, 4930404H24Rik, Birc7, Bpifb2, Bpifb1, Actl10,
## Lime1, Ptf1a, Duoxa2, Wfdc15b, Zswim2, Ndor1, Fut7, A26c3, Erv3, Pck1, Olfr259,
## Olfr73, Cstl1, Arhgap40, Gm15097, Gm14569, Ezhip, Tcp11x2, 4930513O06Rik,
## Gm10490, Tex13b, Gm29133, 4930558C23Rik, Rpe65, Etv3l, Ttc24, Hsd3b5, Pmp2,
## Slc10a5, Slc7a12, Aadacl2, Sucnr1, Ghsr, Clca4b, Chia1, A630076J17Rik, Lce1e,
## Lce3c, Gm17662, Ldc1, Orm2, Uts2, Hes2, Il22ra1, Srarp, Ccl21d, Sit1, Olfr71,
## Toporsl, Dspp, Pdx1, Pdcl2, Gm8879, Aym1, Tmprss11c, Piwil1, 4930519G04Rik,
## Gm21663, Odaph, Gm1979, Htr5a, Oas1e, Pilrb1, Olfr718-ps1, Il23r, Prss2,
## Slco1a4, Clec4e, Vmn2r24, Vmn2r27, Vmn1r46, V1ra8, Gimap7, Doxl1, Reg3b,
## Klrb1b, Clec2i, Olr1, Klrk1, Klri1, Eif4a3l1, Atoh1, Nat8, Igbp1b, Stra8,
## Olfr460, Gm7298, Trim30b, A26c2, Gm52993, Ceacam18, Cnga4, Cyp2a5, Vmn2r66,
## Vmn2r76, 4930451I11Rik, Gvin1, Olfr472, Nanos2, Nccrp1, Pira2, Pira12,
## Ceacam16, Lilra5, Gfy, Pth2, Irgc1, Vmn2r28, Gm20715, Vmn2r49, Vmn1r84, Cdcp3,
## Ffar3, Acsm1, Mrgpra9, Mrgpra2a, Scgb1b30, Odf3, Olfr558, Olfr560, Olfr571,
## Sult3a1, Rfpl4b, Gm49339, Gm5426, Hsd17b6, Glipr1l1, Taf7l2, Neurog3, Olfr1356,
## Elane, A230072I06Rik, Mgat4d, Spata4, Defb39, Defa21, Hapln4, Ido1, Olfr374,
## Ces2f, Nxnl1, Gm7697, Cd209a, Gm4491, Acod1, Prss52, Slc15a1, Olfr728, Galnt15,
## Olfr1512, 1700024G13Rik, Olfr49, Rgr, Gm8104, Gm8267, Mcpt8, Ctsg, Gzmc, Gzmb,
## Gm8050, Gm3298, Gm8108, Pate13, Cyp19a1, Olfr921, Olfr986, Bcl2a1a, Bsx,
## Cyp1a1, Olfr873, Ccdc153, Ccr8, Mobp, Ccr3, Gm10721, Olfr323, Olfr322, Krt26,
## Krt27, Krt40, Krt33a, Krt35, St6galnac2.1, Fam71b, Gm5431, 9930111J21Rik1,
## Slc13a2, Sebox, Fam187a, E030025P04Rik, 9930022D16Rik, Olfr463, 1700012B07Rik,
## Elobl, 4930544D05Rik, Gpr142, Cd300lb, Cd300c, Cd300ld3, H3f4, Vmn1r218,
## Slc17a2, Prl8a2, Serpinb1c, Akr1c21, Gzma, D13Ertd608e, Gm21188, BC147527,
## S100z, Fam240b, Lrrc74a, Serpina12, Serpina3b, Serpina3m, Gm49366, Mroh2b,
## Glycam1, Gsdmc, Slurp1, Gml, Ly6f, Dcstamp, Aqp6, Foxh1, Krt6a, Krt73, Sstr3,
## Krtap7-1, Tex55, Gpr15, Gm21833, Olfr15, Septin12, Rtp1, Rtp2, Tmem207, Stfa2,
## Muc20, A530064D06Rik, Prss33, Prss30, H2-Ob, Btnl2, Tnfsf14, H2-Q1, Prss34,
## Tpsg1, Ccr6, Nhlrc4, Olfr111, Olfr131, Esp8, Vmn2r93, Vmn2r100, Slc22a7, Tff1,
## Cyp4f40, Olfr55, Olfr63, Olfr93, Glyatl3, Kdm5d, Dsg1b, Spink7, Pcdha6, Pcdha7,
## Pcdhac1, Cplx4, Gpr151, Spink6, Gm50364, 1700019N19Rik, Slc22a12, Opalin,
## Slc22a19, Pga5, Ms4a15, Ms4a4c, Gm8369, Cyp17a1, ENSMUSG00000079800.
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.001..."
## [1] "pK = 0.005..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 20%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## Warning in PrepDR5(object = object, features = features, layer = layer, : The
## following features were not available: Gm15433, Lgsn, Glrp1, Gm39653, Pdcd1,
## Gm3646, Slamf7, Ankar, Slamf8, Ifi209, A630095N17Rik, Marco, Spata3, Olfr1256,
## Olfr32, Mylk2, Fcnb, Rbbp8nl, 4930404H24Rik, Birc7, Bpifb2, Bpifb1, Actl10,
## Lime1, Ptf1a, Duoxa2, Wfdc15b, Zswim2, Ndor1, Fut7, A26c3, Erv3, Pck1, Olfr259,
## Olfr73, Cstl1, Arhgap40, Gm15097, Gm14569, Ezhip, Tcp11x2, 4930513O06Rik,
## Gm10490, Tex13b, Gm29133, 4930558C23Rik, Rpe65, Etv3l, Ttc24, Hsd3b5, Pmp2,
## Slc10a5, Slc7a12, Aadacl2, Sucnr1, Ghsr, Clca4b, Chia1, A630076J17Rik, Lce1e,
## Lce3c, Gm17662, Ldc1, Orm2, Uts2, Hes2, Il22ra1, Srarp, Ccl21d, Sit1, Olfr71,
## Toporsl, Dspp, Pdx1, Pdcl2, Gm8879, Aym1, Tmprss11c, Piwil1, 4930519G04Rik,
## Gm21663, Odaph, Gm1979, Htr5a, Oas1e, Pilrb1, Olfr718-ps1, Il23r, Prss2,
## Slco1a4, Clec4e, Vmn2r24, Vmn2r27, Vmn1r46, V1ra8, Gimap7, Doxl1, Reg3b,
## Klrb1b, Clec2i, Olr1, Klrk1, Klri1, Eif4a3l1, Atoh1, Nat8, Igbp1b, Stra8,
## Olfr460, Gm7298, Trim30b, A26c2, Gm52993, Ceacam18, Cnga4, Cyp2a5, Vmn2r66,
## Vmn2r76, 4930451I11Rik, Gvin1, Olfr472, Nanos2, Nccrp1, Pira2, Pira12,
## Ceacam16, Lilra5, Gfy, Pth2, Irgc1, Vmn2r28, Gm20715, Vmn2r49, Vmn1r84, Cdcp3,
## Ffar3, Acsm1, Mrgpra9, Mrgpra2a, Scgb1b30, Odf3, Olfr558, Olfr560, Olfr571,
## Sult3a1, Rfpl4b, Gm49339, Gm5426, Hsd17b6, Glipr1l1, Taf7l2, Neurog3, Olfr1356,
## Elane, A230072I06Rik, Mgat4d, Spata4, Defb39, Defa21, Hapln4, Ido1, Olfr374,
## Ces2f, Nxnl1, Gm7697, Cd209a, Gm4491, Acod1, Prss52, Slc15a1, Olfr728, Galnt15,
## Olfr1512, 1700024G13Rik, Olfr49, Rgr, Gm8104, Gm8267, Mcpt8, Ctsg, Gzmc, Gzmb,
## Gm8050, Gm3298, Gm8108, Pate13, Cyp19a1, Olfr921, Olfr986, Bcl2a1a, Bsx,
## Cyp1a1, Olfr873, Ccdc153, Ccr8, Mobp, Ccr3, Gm10721, Olfr323, Olfr322, Krt26,
## Krt27, Krt40, Krt33a, Krt35, St6galnac2.1, Fam71b, Gm5431, 9930111J21Rik1,
## Slc13a2, Sebox, Fam187a, E030025P04Rik, 9930022D16Rik, Olfr463, 1700012B07Rik,
## Elobl, 4930544D05Rik, Gpr142, Cd300lb, Cd300c, Cd300ld3, H3f4, Vmn1r218,
## Slc17a2, Prl8a2, Serpinb1c, Akr1c21, Gzma, D13Ertd608e, Gm21188, BC147527,
## S100z, Fam240b, Lrrc74a, Serpina12, Serpina3b, Serpina3m, Gm49366, Mroh2b,
## Glycam1, Gsdmc, Slurp1, Gml, Ly6f, Dcstamp, Aqp6, Foxh1, Krt6a, Krt73, Sstr3,
## Krtap7-1, Tex55, Gpr15, Gm21833, Olfr15, Septin12, Rtp1, Rtp2, Tmem207, Stfa2,
## Muc20, A530064D06Rik, Prss33, Prss30, H2-Ob, Btnl2, Tnfsf14, H2-Q1, Prss34,
## Tpsg1, Ccr6, Nhlrc4, Olfr111, Olfr131, Esp8, Vmn2r93, Vmn2r100, Slc22a7, Tff1,
## Cyp4f40, Olfr55, Olfr63, Olfr93, Glyatl3, Kdm5d, Dsg1b, Spink7, Pcdha6, Pcdha7,
## Pcdhac1, Cplx4, Gpr151, Spink6, Gm50364, 1700019N19Rik, Slc22a12, Opalin,
## Slc22a19, Pga5, Ms4a15, Ms4a4c, Gm8369, Cyp17a1, ENSMUSG00000079800.
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.001..."
## [1] "pK = 0.005..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 25%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## Warning in PrepDR5(object = object, features = features, layer = layer, : The
## following features were not available: Gm15433, Lgsn, Glrp1, Gm39653, Pdcd1,
## Gm3646, Slamf7, Ankar, Slamf8, Ifi209, A630095N17Rik, Marco, Spata3, Olfr1256,
## Olfr32, Mylk2, Fcnb, Rbbp8nl, 4930404H24Rik, Birc7, Bpifb2, Bpifb1, Actl10,
## Lime1, Ptf1a, Duoxa2, Wfdc15b, Zswim2, Ndor1, Fut7, A26c3, Erv3, Pck1, Olfr259,
## Olfr73, Cstl1, Arhgap40, Gm15097, Gm14569, Ezhip, Tcp11x2, 4930513O06Rik,
## Gm10490, Tex13b, Gm29133, 4930558C23Rik, Rpe65, Etv3l, Ttc24, Hsd3b5, Pmp2,
## Slc10a5, Slc7a12, Aadacl2, Sucnr1, Ghsr, Clca4b, Chia1, A630076J17Rik, Lce1e,
## Lce3c, Gm17662, Ldc1, Orm2, Uts2, Hes2, Il22ra1, Srarp, Ccl21d, Sit1, Olfr71,
## Toporsl, Dspp, Pdx1, Pdcl2, Gm8879, Aym1, Tmprss11c, Piwil1, 4930519G04Rik,
## Gm21663, Odaph, Gm1979, Htr5a, Oas1e, Pilrb1, Olfr718-ps1, Il23r, Prss2,
## Slco1a4, Clec4e, Vmn2r24, Vmn2r27, Vmn1r46, V1ra8, Gimap7, Doxl1, Reg3b,
## Klrb1b, Clec2i, Olr1, Klrk1, Klri1, Eif4a3l1, Atoh1, Nat8, Igbp1b, Stra8,
## Olfr460, Gm7298, Trim30b, A26c2, Gm52993, Ceacam18, Cnga4, Cyp2a5, Vmn2r66,
## Vmn2r76, 4930451I11Rik, Gvin1, Olfr472, Nanos2, Nccrp1, Pira2, Pira12,
## Ceacam16, Lilra5, Gfy, Pth2, Irgc1, Vmn2r28, Gm20715, Vmn2r49, Vmn1r84, Cdcp3,
## Ffar3, Acsm1, Mrgpra9, Mrgpra2a, Scgb1b30, Odf3, Olfr558, Olfr560, Olfr571,
## Sult3a1, Rfpl4b, Gm49339, Gm5426, Hsd17b6, Glipr1l1, Taf7l2, Neurog3, Olfr1356,
## Elane, A230072I06Rik, Mgat4d, Spata4, Defb39, Defa21, Hapln4, Ido1, Olfr374,
## Ces2f, Nxnl1, Gm7697, Cd209a, Gm4491, Acod1, Prss52, Slc15a1, Olfr728, Galnt15,
## Olfr1512, 1700024G13Rik, Olfr49, Rgr, Gm8104, Gm8267, Mcpt8, Ctsg, Gzmc, Gzmb,
## Gm8050, Gm3298, Gm8108, Pate13, Cyp19a1, Olfr921, Olfr986, Bcl2a1a, Bsx,
## Cyp1a1, Olfr873, Ccdc153, Ccr8, Mobp, Ccr3, Gm10721, Olfr323, Olfr322, Krt26,
## Krt27, Krt40, Krt33a, Krt35, St6galnac2.1, Fam71b, Gm5431, 9930111J21Rik1,
## Slc13a2, Sebox, Fam187a, E030025P04Rik, 9930022D16Rik, Olfr463, 1700012B07Rik,
## Elobl, 4930544D05Rik, Gpr142, Cd300lb, Cd300c, Cd300ld3, H3f4, Vmn1r218,
## Slc17a2, Prl8a2, Serpinb1c, Akr1c21, Gzma, D13Ertd608e, Gm21188, BC147527,
## S100z, Fam240b, Lrrc74a, Serpina12, Serpina3b, Serpina3m, Gm49366, Mroh2b,
## Glycam1, Gsdmc, Slurp1, Gml, Ly6f, Dcstamp, Aqp6, Foxh1, Krt6a, Krt73, Sstr3,
## Krtap7-1, Tex55, Gpr15, Gm21833, Olfr15, Septin12, Rtp1, Rtp2, Tmem207, Stfa2,
## Muc20, A530064D06Rik, Prss33, Prss30, H2-Ob, Btnl2, Tnfsf14, H2-Q1, Prss34,
## Tpsg1, Ccr6, Nhlrc4, Olfr111, Olfr131, Esp8, Vmn2r93, Vmn2r100, Slc22a7, Tff1,
## Cyp4f40, Olfr55, Olfr63, Olfr93, Glyatl3, Kdm5d, Dsg1b, Spink7, Pcdha6, Pcdha7,
## Pcdhac1, Cplx4, Gpr151, Spink6, Gm50364, 1700019N19Rik, Slc22a12, Opalin,
## Slc22a19, Pga5, Ms4a15, Ms4a4c, Gm8369, Cyp17a1, ENSMUSG00000079800.
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.001..."
## [1] "pK = 0.005..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 30%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## Warning in PrepDR5(object = object, features = features, layer = layer, : The
## following features were not available: Gm15433, Lgsn, Glrp1, Gm39653, Pdcd1,
## Gm3646, Slamf7, Ankar, Slamf8, Ifi209, A630095N17Rik, Marco, Spata3, Olfr1256,
## Olfr32, Mylk2, Fcnb, Rbbp8nl, 4930404H24Rik, Birc7, Bpifb2, Bpifb1, Actl10,
## Lime1, Ptf1a, Duoxa2, Wfdc15b, Zswim2, Ndor1, Fut7, A26c3, Erv3, Pck1, Olfr259,
## Olfr73, Cstl1, Arhgap40, Gm15097, Gm14569, Ezhip, Tcp11x2, 4930513O06Rik,
## Gm10490, Tex13b, Gm29133, 4930558C23Rik, Rpe65, Etv3l, Ttc24, Hsd3b5, Pmp2,
## Slc10a5, Slc7a12, Aadacl2, Sucnr1, Ghsr, Clca4b, Chia1, A630076J17Rik, Lce1e,
## Lce3c, Gm17662, Ldc1, Orm2, Uts2, Hes2, Il22ra1, Srarp, Ccl21d, Sit1, Olfr71,
## Toporsl, Dspp, Pdx1, Pdcl2, Gm8879, Aym1, Tmprss11c, Piwil1, 4930519G04Rik,
## Gm21663, Odaph, Gm1979, Htr5a, Oas1e, Pilrb1, Olfr718-ps1, Il23r, Prss2,
## Slco1a4, Clec4e, Vmn2r24, Vmn2r27, Vmn1r46, V1ra8, Gimap7, Doxl1, Reg3b,
## Klrb1b, Clec2i, Olr1, Klrk1, Klri1, Eif4a3l1, Atoh1, Nat8, Igbp1b, Stra8,
## Olfr460, Gm7298, Trim30b, A26c2, Gm52993, Ceacam18, Cnga4, Cyp2a5, Vmn2r66,
## Vmn2r76, 4930451I11Rik, Gvin1, Olfr472, Nanos2, Nccrp1, Pira2, Pira12,
## Ceacam16, Lilra5, Gfy, Pth2, Irgc1, Vmn2r28, Gm20715, Vmn2r49, Vmn1r84, Cdcp3,
## Ffar3, Acsm1, Mrgpra9, Mrgpra2a, Scgb1b30, Odf3, Olfr558, Olfr560, Olfr571,
## Sult3a1, Rfpl4b, Gm49339, Gm5426, Hsd17b6, Glipr1l1, Taf7l2, Neurog3, Olfr1356,
## Elane, A230072I06Rik, Mgat4d, Spata4, Defb39, Defa21, Hapln4, Ido1, Olfr374,
## Ces2f, Nxnl1, Gm7697, Cd209a, Gm4491, Acod1, Prss52, Slc15a1, Olfr728, Galnt15,
## Olfr1512, 1700024G13Rik, Olfr49, Rgr, Gm8104, Gm8267, Mcpt8, Ctsg, Gzmc, Gzmb,
## Gm8050, Gm3298, Gm8108, Pate13, Cyp19a1, Olfr921, Olfr986, Bcl2a1a, Bsx,
## Cyp1a1, Olfr873, Ccdc153, Ccr8, Mobp, Ccr3, Gm10721, Olfr323, Olfr322, Krt26,
## Krt27, Krt40, Krt33a, Krt35, St6galnac2.1, Fam71b, Gm5431, 9930111J21Rik1,
## Slc13a2, Sebox, Fam187a, E030025P04Rik, 9930022D16Rik, Olfr463, 1700012B07Rik,
## Elobl, 4930544D05Rik, Gpr142, Cd300lb, Cd300c, Cd300ld3, H3f4, Vmn1r218,
## Slc17a2, Prl8a2, Serpinb1c, Akr1c21, Gzma, D13Ertd608e, Gm21188, BC147527,
## S100z, Fam240b, Lrrc74a, Serpina12, Serpina3b, Serpina3m, Gm49366, Mroh2b,
## Glycam1, Gsdmc, Slurp1, Gml, Ly6f, Dcstamp, Aqp6, Foxh1, Krt6a, Krt73, Sstr3,
## Krtap7-1, Tex55, Gpr15, Gm21833, Olfr15, Septin12, Rtp1, Rtp2, Tmem207, Stfa2,
## Muc20, A530064D06Rik, Prss33, Prss30, H2-Ob, Btnl2, Tnfsf14, H2-Q1, Prss34,
## Tpsg1, Ccr6, Nhlrc4, Olfr111, Olfr131, Esp8, Vmn2r93, Vmn2r100, Slc22a7, Tff1,
## Cyp4f40, Olfr55, Olfr63, Olfr93, Glyatl3, Kdm5d, Dsg1b, Spink7, Pcdha6, Pcdha7,
## Pcdhac1, Cplx4, Gpr151, Spink6, Gm50364, 1700019N19Rik, Slc22a12, Opalin,
## Slc22a19, Pga5, Ms4a15, Ms4a4c, Gm8369, Cyp17a1, ENSMUSG00000079800.
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.001..."
## [1] "pK = 0.005..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
sweep.stats <- summarizeSweep(sweep.res.list, GT = FALSE)
## Loading required package: KernSmooth
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
## Loading required package: ROCR
bcmvn <- find.pK(sweep.stats)

## NULL
# pK plot with numbers

pK=as.numeric(as.character(bcmvn$pK))
BCmetric=bcmvn$BCmetric
pK_choose = pK[which(BCmetric %in% max(BCmetric))]

par(mar=c(5,4,4,8)+1,cex.main=1.2,font.main=2)
plot(x = pK, y = BCmetric, pch = 16,type="b",
     col = "blue",lty=1)
abline(v=pK_choose,lwd=2,col='red',lty=2)
title("The BCmvn distributions")
text(pK_choose,max(BCmetric),as.character(pK_choose),pos = 4,col = "red")

pK_value <- pK_choose

# Homotypic Doublet Proportion Estimate

homotypic.prop <- modelHomotypic(Embryo@meta.data$RNA_snn_res.0.35)           ## ex: annotations <- seu_kidney@meta.data$ClusteringResults
nExp_poi <- round(0.09*nrow(EmbryoLOF@meta.data))  ## Assuming 8% doublet formation rate - tailor for your dataset
nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))

EmbryoLOF <- doubletFinder(EmbryoLOF, PCs = 1:20,
                                 pN = 0.25, pK = pK_value,
                                 nExp = nExp_poi, reuse.pANN = FALSE,
                                 sct = FALSE)
## [1] "Creating 3351 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## Warning in PrepDR5(object = object, features = features, layer = layer, : The
## following features were not available: Gm15433, Lgsn, Glrp1, Gm39653, Pdcd1,
## Gm3646, Slamf7, Ankar, Slamf8, Ifi209, A630095N17Rik, Marco, Spata3, Olfr1256,
## Olfr32, Mylk2, Fcnb, Rbbp8nl, 4930404H24Rik, Birc7, Bpifb2, Bpifb1, Actl10,
## Lime1, Ptf1a, Duoxa2, Wfdc15b, Zswim2, Ndor1, Fut7, A26c3, Erv3, Pck1, Olfr259,
## Olfr73, Cstl1, Arhgap40, Gm15097, Gm14569, Ezhip, Tcp11x2, 4930513O06Rik,
## Gm10490, Tex13b, Gm29133, 4930558C23Rik, Rpe65, Etv3l, Ttc24, Hsd3b5, Pmp2,
## Slc10a5, Slc7a12, Aadacl2, Sucnr1, Ghsr, Clca4b, Chia1, A630076J17Rik, Lce1e,
## Lce3c, Gm17662, Ldc1, Orm2, Uts2, Hes2, Il22ra1, Srarp, Ccl21d, Sit1, Olfr71,
## Toporsl, Dspp, Pdx1, Pdcl2, Gm8879, Aym1, Tmprss11c, Piwil1, 4930519G04Rik,
## Gm21663, Odaph, Gm1979, Htr5a, Oas1e, Pilrb1, Olfr718-ps1, Il23r, Prss2,
## Slco1a4, Clec4e, Vmn2r24, Vmn2r27, Vmn1r46, V1ra8, Gimap7, Doxl1, Reg3b,
## Klrb1b, Clec2i, Olr1, Klrk1, Klri1, Eif4a3l1, Atoh1, Nat8, Igbp1b, Stra8,
## Olfr460, Gm7298, Trim30b, A26c2, Gm52993, Ceacam18, Cnga4, Cyp2a5, Vmn2r66,
## Vmn2r76, 4930451I11Rik, Gvin1, Olfr472, Nanos2, Nccrp1, Pira2, Pira12,
## Ceacam16, Lilra5, Gfy, Pth2, Irgc1, Vmn2r28, Gm20715, Vmn2r49, Vmn1r84, Cdcp3,
## Ffar3, Acsm1, Mrgpra9, Mrgpra2a, Scgb1b30, Odf3, Olfr558, Olfr560, Olfr571,
## Sult3a1, Rfpl4b, Gm49339, Gm5426, Hsd17b6, Glipr1l1, Taf7l2, Neurog3, Olfr1356,
## Elane, A230072I06Rik, Mgat4d, Spata4, Defb39, Defa21, Hapln4, Ido1, Olfr374,
## Ces2f, Nxnl1, Gm7697, Cd209a, Gm4491, Acod1, Prss52, Slc15a1, Olfr728, Galnt15,
## Olfr1512, 1700024G13Rik, Olfr49, Rgr, Gm8104, Gm8267, Mcpt8, Ctsg, Gzmc, Gzmb,
## Gm8050, Gm3298, Gm8108, Pate13, Cyp19a1, Olfr921, Olfr986, Bcl2a1a, Bsx,
## Cyp1a1, Olfr873, Ccdc153, Ccr8, Mobp, Ccr3, Gm10721, Olfr323, Olfr322, Krt26,
## Krt27, Krt40, Krt33a, Krt35, St6galnac2.1, Fam71b, Gm5431, 9930111J21Rik1,
## Slc13a2, Sebox, Fam187a, E030025P04Rik, 9930022D16Rik, Olfr463, 1700012B07Rik,
## Elobl, 4930544D05Rik, Gpr142, Cd300lb, Cd300c, Cd300ld3, H3f4, Vmn1r218,
## Slc17a2, Prl8a2, Serpinb1c, Akr1c21, Gzma, D13Ertd608e, Gm21188, BC147527,
## S100z, Fam240b, Lrrc74a, Serpina12, Serpina3b, Serpina3m, Gm49366, Mroh2b,
## Glycam1, Gsdmc, Slurp1, Gml, Ly6f, Dcstamp, Aqp6, Foxh1, Krt6a, Krt73, Sstr3,
## Krtap7-1, Tex55, Gpr15, Gm21833, Olfr15, Septin12, Rtp1, Rtp2, Tmem207, Stfa2,
## Muc20, A530064D06Rik, Prss33, Prss30, H2-Ob, Btnl2, Tnfsf14, H2-Q1, Prss34,
## Tpsg1, Ccr6, Nhlrc4, Olfr111, Olfr131, Esp8, Vmn2r93, Vmn2r100, Slc22a7, Tff1,
## Cyp4f40, Olfr55, Olfr63, Olfr93, Glyatl3, Kdm5d, Dsg1b, Spink7, Pcdha6, Pcdha7,
## Pcdhac1, Cplx4, Gpr151, Spink6, Gm50364, 1700019N19Rik, Slc22a12, Opalin,
## Slc22a19, Pga5, Ms4a15, Ms4a4c, Gm8369, Cyp17a1, ENSMUSG00000079800.
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
names(EmbryoLOF@meta.data[length(EmbryoLOF@meta.data)])
## [1] "DF.classifications_0.25_0.14_905"
EmbryoLOF <- doubletFinder(EmbryoLOF, PCs = 1:10, 
                                 pN = 0.25, pK = pK_choose, # Get PK value from plot metric above
                                 nExp = nExp_poi.adj,
                                 reuse.pANN = names(EmbryoLOF@meta.data[length(EmbryoLOF@meta.data)]),
                                 sct = FALSE)


p1 <- DimPlot(EmbryoLOF, group.by=names(EmbryoLOF@meta.data[length(EmbryoLOF@meta.data)]),
                 cols = c("red","grey"),
                  reduction="umap", pt.size=0.3)

p2 <- FeaturePlot(EmbryoLOF, reduction = "umap",
                  features =  "nFeature_RNA")


p1+p2

names(EmbryoLOF@meta.data[length(EmbryoLOF@meta.data)])
## [1] "DF.classifications_0.25_0.14_789"
# It does not look like it is telling apart the doublets in most cases, looks like an overshooting. 
# Most doublets must have been taken out in the main Liver analysis by HTO selection

dittoBarPlot(object = EmbryoLOF,
             var = names(EmbryoLOF@meta.data[length(EmbryoLOF@meta.data)]),
             group.by = "RNA_snn_res.0.35")

VlnPlot(EmbryoLOF, features =  "Ptprc",
        group.by =  "RNA_snn_res.0.35", 
        pt.size = 0.05 ) + theme(legend.position="none")

EmbryoLOF <- EmbryoLOF[, which(EmbryoLOF@meta.data[length(EmbryoLOF@meta.data)] == "Singlet")]

Perform Doublet removal on EmbryoN123 Wildtype

# Doublet Finder. Until we do not have a clear cluster identity it is better that we not use Doublet Finder

# I will split the datasets in Wt v LOF because if not, fake doublets will be produced


EmbryoWt <- FindVariableFeatures(EmbryoWt, 
                               selection.method = "vst",
                               nfeatures = 2000)
## Finding variable features for layer counts.2
top10 <- head(VariableFeatures(EmbryoWt), 10)
top10
##  [1] "Actc1" "Mylpf" "Ttn"   "Nefm"  "C1qb"  "Myl1"  "Apoe"  "Myog"  "Acta2"
## [10] "Tnnc1"
all.genes <- rownames(EmbryoWt)
EmbryoWt <- ScaleData(EmbryoWt, features = all.genes)
## Centering and scaling data matrix
## Warning: Different features in new layer data than already exists for
## scale.data
## Perform linear dimensional reduction

EmbryoWt <- RunPCA(EmbryoWt,
                 features = VariableFeatures(object = EmbryoWt))
## PC_ 1 
## Positive:  Lgals1, Gpc3, S100a11, Sparc, Col1a2, Col3a1, Fbn2, Cald1, Tpm1, Egr1 
##     Fos, Col1a1, Id3, Jun, Fn1, Anxa2, Hmga2, Col5a2, Cnn2, Cdh11 
##     Mest, Twist1, Dlc1, Prrx1, Ldha, Peg3, Itm2a, Cyba, Id1, Hmcn1 
## Negative:  Rtn1, Tubb3, Stmn3, Map2, Dcx, Crmp1, Elavl3, Kif5c, Tagln3, Stmn2 
##     Ina, Myt1l, Tubb2b, Akap6, Rims2, Elavl2, Nsg2, Rab3c, Mllt11, Cadps 
##     Ctnna2, Elavl4, Celf4, Tuba1a, Nrxn1, Dpysl3, Mapt, H2bu2, Igfbpl1, Map1b 
## PC_ 2 
## Positive:  Rac2, Tyrobp, Fcer1g, Laptm5, Spi1, Gmfg, Lcp1, Lst1, Ptprc, Psmb8 
##     Cyth4, Ptpn18, Hcls1, Nrros, Ptpn6, Inpp5d, Dock8, Was, Cd53, Samsn1 
##     Coro1a, Cd52, Ms4a6c, Fcgr3, Arhgap30, Plek, Ncf2, Gpr65, Dock2, Fermt3 
## Negative:  Gpc3, Fbn2, Col1a2, Rbms3, Col3a1, Cdh11, Fbxl7, Col1a1, Peg3, Gpc6 
##     Cdkn1c, Aff3, Cald1, Ptprd, Bnc2, Nfib, Fn1, Col5a2, Adgrl3, Ptn 
##     Tpm1, Vcan, Nfia, Ror1, Zfhx4, Sparc, Rora, Tenm3, Prrx1, Tshz2 
## PC_ 3 
## Positive:  Rbms3, Col1a1, Col1a2, Col3a1, Tshz2, Dab2, S100a11, Tanc2, Fos, Lgals1 
##     Maf, Egr1, Colec12, Lst1, Ptprd, Tyrobp, Fbn2, Fcgrt, Fcer1g, Gpc3 
##     Akap12, Igf1, Nav3, Fosb, Junb, Tpm1, Zfp36, Dlc1, Bnc2, Laptm5 
## Negative:  Hmgb2, Top2a, Cdca8, Pbk, Cenpf, Prc1, Nusap1, Spc25, H1f5, Ube2c 
##     H3c3, Tpx2, Kif11, Smc4, Cks2, H2ac24, Mki67, Cdca3, Pclaf, H2ac8 
##     H2ac4, Birc5, H1f4, H4c4, Cenpe, Ckap2l, H2bc14, Knl1, H2bc18, H2bc11 
## PC_ 4 
## Positive:  Aff3, Gpc6, Top2a, Nusap1, Mki67, Kif11, Incenp, Aurkb, Ckap2l, H2ax 
##     Robo2, Prrx1, Kif23, Mis18bp1, Cdh11, H2bc18, Prc1, Fbxl7, H2ac4, H3c3 
##     Twist1, Aff2, Knl1, Kif20b, Smc4, Adgrl3, H2ac8, Spc25, H4c8, H4c4 
## Negative:  Actc1, Myog, Ttn, Neb, Tnnt2, Myod1, Mymk, Klhl41, Mymx, Des 
##     Tnnt1, Atp2a1, Chrna1, Mylk4, Fitm1, Myl1, Trim55, Tnni1, Mylpf, Mrln 
##     Synpo2l, Smyd1, Myh3, Cryab, Chrng, Tnnc1, Cdkn1a, Vgll2, Rbm24, Myl4 
## PC_ 5 
## Positive:  Top2a, Kif11, Prc1, Nusap1, Mki67, Diaph3, Incenp, Aurkb, Mis18bp1, Spc25 
##     Kif4, Ckap2l, Kif23, Knl1, H2ac4, Cdk1, Sgo2a, Cenpe, Hmmr, H3c3 
##     Smc4, Ect2, H2ac24, H2ac8, Kif20b, Tpx2, Fbxo5, H2bc18, Cenpf, H1f5 
## Negative:  Gadd45g, Igsf8, Nhlh1, Rnd2, St18, Srrm4, Hes6, Insm1, Klhl35, Rhbdl3 
##     Ckb, Neurog2, Prox1, Ppp1r14a, Eya2, Neurod1, Pou3f2, Nhlh2, Igfbpl1, Lmo1 
##     Tubb3, Neurod4, Tagln3, Ascl1, Gpx1, Zbtb18, Ccnd1, Igfbp2, Msx3, Ebf2
ElbowPlot(EmbryoWt, ndims = 30)

# In the case of Liver3_2Jul21 we can set up for 16 dimensions

# Cluster the cells

EmbryoWt <- FindNeighbors(EmbryoWt, dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
EmbryoWt <- FindClusters(EmbryoWt, resolution = 0.1, verbose = F)
EmbryoWt <- FindClusters(EmbryoWt, resolution = 0.2, verbose = F)
EmbryoWt <- FindClusters(EmbryoWt, resolution = 0.35, verbose = F)

#Run UMAP

EmbryoWt <- RunUMAP(EmbryoWt, dims = 1:20, verbose = F)
## Found more than one class "dist" in cache; using the first, from namespace 'spam'
## Also defined by 'BiocGenerics'
## Found more than one class "dist" in cache; using the first, from namespace 'spam'
## Also defined by 'BiocGenerics'
DimPlot(EmbryoWt, group.by = "RNA_snn_res.0.35")

# DoubletFinder can be broken up into 4 steps: (1) Generate artificial doublets from existing scRNA-seq data (2) Pre-process merged real-artificial data (3) Perform PCA and use the PC distance matrix to find each cell’s proportion of artificial k nearest neighbors (pANN) (4) Rank order and threshold pANN values according to the expected number of doublets

# pK Identification (no ground-truth)
sweep.res.list <- paramSweep(EmbryoWt, PCs = 1:20, sct = FALSE)
## [1] "Creating artificial doublets for pN = 5%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## Warning in PrepDR5(object = object, features = features, layer = layer, : The
## following features were not available: Becn2, Mettl21e, Teddm1b, A030014E15Rik,
## Krtap28-13, Ccl20, Xkr9, C4bp, Pigr, Fmo3, Cryga, Gm4846, Gal3st2b, Fcrlb,
## Gpr25, Serpinb11, Cfhr4, Ifi208, Ifi202b, Gm14444, 2210418O10Rik, Gm14409,
## Defb45, Defb36, 4933409G03Rik, Bpifb9a, Tgm7, Gm15557, Gtsf1l, Spdye4c,
## Pin1rt1, Gm11060, 1700021F07Rik, Pramel7, 1700010B08Rik, Tmco5, Abo, Spin2d,
## Gm18336, Trex2, Gm9, Rhox4a2, Rhox2b, Rhox4c, Rhox2g, Rhox2h, 2010106E10Rik,
## Ube2dnl1, Cldn34c2, Akap4, Opn1mw, Kir3dl2, Serpina7, 4933428M09Rik, Vgll1,
## Gm15262, Cldn34d, 1700020N15Rik, Ssxb5, Gm6592, H2ap, H2al3, Slc6a14, Gm9112,
## Cdx4, Rhox6, Rhox8, Arl14, Cd5l, Gja8, Gm21962, Adh6a, Clca3a2, Clca3b, Gm1527,
## Lrriq4, Lrrc31, Lce1g, Mup9, Mup17, Slc2a7, Skint8, Skint9, Zpld2, Gm13288,
## Cyp2j5, Slfnl1, 1700012P22Rik, Gjb4, Mup2, Gm13306, Olfr156, Olfr272, Pla2g2f,
## Pla2g2e, Ocm, 1700001J03Rik, Cdx2, Speer4c, Gnrhr, Tmprss11b, Tmprss11e,
## Gm28434, Gm21698, Gm10220, Gm7361, Dthd1, Sds, Tmem270, Gal3st4, Drd5, Tcf23,
## Nobox, Smco2, Clec4f, Klrb1, Clec2e, Clec12b, Gm6619, Vmn1r15, Vmn1r21, Rnf133,
## Hyal4, Nat8f7, Vmn1r33, Figla, Olfr675, Sult2a1, Klk11, Klk5, Klk15, Cyp2f2,
## Klk1b24, Crx, Olfr694, Mill1, AU018091, Apoc2, Vmn1r55, Olfr520, Vmn2r54,
## Cuzd1, Cnfn, Tex36, Mrgprx1, 1700015G11Rik, Scnn1g, Muc5b, Vmn1r85, Lyz1,
## Gm7075, Anapc15-ps, Rfx6, Rdh1, Rdh16f1, Mettl7b, Prf1, Taar4, Gstt4, Casp14,
## Gm4767, 4932415D10Rik, 2310057J18Rik, Atp4b, Cldn24, BB014433, 4930467E23Rik,
## Gm31371, Defb2, Anxa10, Sgo2b, Slc38a8, Gm10663, Nkx6-3, Ces1c, Ces1e, Cd209c,
## Gm49320, Slc10a2, Olfr372, Cib3, Chrnb3, Gm10358, Gm10999, Trhr2, Defb48,
## Chrna2, Synb, Olfr727, Ang2, Gm21750, Gm49387, Fam170b, Npy4r, 9230112D13Rik,
## Mat1a, Sftpa1, Gm8126, Cma1, Olfr720, Gm2916, Ankk1, Pou2af1, Olfr912, Olfr967,
## Olfr976, Trim43b, Tbc1d21, Apoa5, Prss45, Fam240a, Olfr224, Krt12, Krt39,
## Krt31, Krt42, Smim23, BC053393, Havcr1, Olfr1396, Olfr10, Olfr1388, Ppy,
## Olfr1378, Tspan10, Rnf222, Il5, Spaca3, Tex19.1, Alox12b, Slfn4, Lypd8, Npc1l1,
## Ush1g, Slc16a5, H1f9, Olfr394, Vmn1r197, Naip1, Prl3b1, Prl5a1, Akr1c18,
## Zfp998, Olfr1535, Cox8c, Gphb5, Acot5, Cyp2d9, Tas2r119, Ly6m, Ly6g2, Odf1,
## Gm7489, Tex33, Krt72, Gm10337, Smgc, Spatc1, Krt86, Slc9c1, Dppa4, Tssk1,
## Itgb2l, A730009L09Rik, Krtap26-1, Fetub, Stfa2l1, Dcpp2, Btnl4, Cyp21a1,
## Ly6g5c, Cdsn, 2300002M23Rik, Gm8909, H2-T3, Tcp10b, Olfr107, Crisp3, Crisp1,
## Fpr2, Vmn2r92, Vmn1r224, Vmn1r229, Clpsl2, Gm17657, Glp1r, Tff2, Cyp4f37,
## 2410137M14Rik, Gm35060, Wnt8a, 1700034E13Rik, Gykl1, Hrh4, Lyzl1, Lipf,
## Cyp2c39, Slc22a30, Cblif, Olfr1419, Glyat, Cntf, Cabp2.
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.001..."
## [1] "pK = 0.005..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 10%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## Warning in PrepDR5(object = object, features = features, layer = layer, : The
## following features were not available: Becn2, Mettl21e, Teddm1b, A030014E15Rik,
## Krtap28-13, Ccl20, Xkr9, C4bp, Pigr, Fmo3, Cryga, Gm4846, Gal3st2b, Fcrlb,
## Gpr25, Serpinb11, Cfhr4, Ifi208, Ifi202b, Gm14444, 2210418O10Rik, Gm14409,
## Defb45, Defb36, 4933409G03Rik, Bpifb9a, Tgm7, Gm15557, Gtsf1l, Spdye4c,
## Pin1rt1, Gm11060, 1700021F07Rik, Pramel7, 1700010B08Rik, Tmco5, Abo, Spin2d,
## Gm18336, Trex2, Gm9, Rhox4a2, Rhox2b, Rhox4c, Rhox2g, Rhox2h, 2010106E10Rik,
## Ube2dnl1, Cldn34c2, Akap4, Opn1mw, Kir3dl2, Serpina7, 4933428M09Rik, Vgll1,
## Gm15262, Cldn34d, 1700020N15Rik, Ssxb5, Gm6592, H2ap, H2al3, Slc6a14, Gm9112,
## Cdx4, Rhox6, Rhox8, Arl14, Cd5l, Gja8, Gm21962, Adh6a, Clca3a2, Clca3b, Gm1527,
## Lrriq4, Lrrc31, Lce1g, Mup9, Mup17, Slc2a7, Skint8, Skint9, Zpld2, Gm13288,
## Cyp2j5, Slfnl1, 1700012P22Rik, Gjb4, Mup2, Gm13306, Olfr156, Olfr272, Pla2g2f,
## Pla2g2e, Ocm, 1700001J03Rik, Cdx2, Speer4c, Gnrhr, Tmprss11b, Tmprss11e,
## Gm28434, Gm21698, Gm10220, Gm7361, Dthd1, Sds, Tmem270, Gal3st4, Drd5, Tcf23,
## Nobox, Smco2, Clec4f, Klrb1, Clec2e, Clec12b, Gm6619, Vmn1r15, Vmn1r21, Rnf133,
## Hyal4, Nat8f7, Vmn1r33, Figla, Olfr675, Sult2a1, Klk11, Klk5, Klk15, Cyp2f2,
## Klk1b24, Crx, Olfr694, Mill1, AU018091, Apoc2, Vmn1r55, Olfr520, Vmn2r54,
## Cuzd1, Cnfn, Tex36, Mrgprx1, 1700015G11Rik, Scnn1g, Muc5b, Vmn1r85, Lyz1,
## Gm7075, Anapc15-ps, Rfx6, Rdh1, Rdh16f1, Mettl7b, Prf1, Taar4, Gstt4, Casp14,
## Gm4767, 4932415D10Rik, 2310057J18Rik, Atp4b, Cldn24, BB014433, 4930467E23Rik,
## Gm31371, Defb2, Anxa10, Sgo2b, Slc38a8, Gm10663, Nkx6-3, Ces1c, Ces1e, Cd209c,
## Gm49320, Slc10a2, Olfr372, Cib3, Chrnb3, Gm10358, Gm10999, Trhr2, Defb48,
## Chrna2, Synb, Olfr727, Ang2, Gm21750, Gm49387, Fam170b, Npy4r, 9230112D13Rik,
## Mat1a, Sftpa1, Gm8126, Cma1, Olfr720, Gm2916, Ankk1, Pou2af1, Olfr912, Olfr967,
## Olfr976, Trim43b, Tbc1d21, Apoa5, Prss45, Fam240a, Olfr224, Krt12, Krt39,
## Krt31, Krt42, Smim23, BC053393, Havcr1, Olfr1396, Olfr10, Olfr1388, Ppy,
## Olfr1378, Tspan10, Rnf222, Il5, Spaca3, Tex19.1, Alox12b, Slfn4, Lypd8, Npc1l1,
## Ush1g, Slc16a5, H1f9, Olfr394, Vmn1r197, Naip1, Prl3b1, Prl5a1, Akr1c18,
## Zfp998, Olfr1535, Cox8c, Gphb5, Acot5, Cyp2d9, Tas2r119, Ly6m, Ly6g2, Odf1,
## Gm7489, Tex33, Krt72, Gm10337, Smgc, Spatc1, Krt86, Slc9c1, Dppa4, Tssk1,
## Itgb2l, A730009L09Rik, Krtap26-1, Fetub, Stfa2l1, Dcpp2, Btnl4, Cyp21a1,
## Ly6g5c, Cdsn, 2300002M23Rik, Gm8909, H2-T3, Tcp10b, Olfr107, Crisp3, Crisp1,
## Fpr2, Vmn2r92, Vmn1r224, Vmn1r229, Clpsl2, Gm17657, Glp1r, Tff2, Cyp4f37,
## 2410137M14Rik, Gm35060, Wnt8a, 1700034E13Rik, Gykl1, Hrh4, Lyzl1, Lipf,
## Cyp2c39, Slc22a30, Cblif, Olfr1419, Glyat, Cntf, Cabp2.
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.001..."
## [1] "pK = 0.005..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 15%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## Warning in PrepDR5(object = object, features = features, layer = layer, : The
## following features were not available: Becn2, Mettl21e, Teddm1b, A030014E15Rik,
## Krtap28-13, Ccl20, Xkr9, C4bp, Pigr, Fmo3, Cryga, Gm4846, Gal3st2b, Fcrlb,
## Gpr25, Serpinb11, Cfhr4, Ifi208, Ifi202b, Gm14444, 2210418O10Rik, Gm14409,
## Defb45, Defb36, 4933409G03Rik, Bpifb9a, Tgm7, Gm15557, Gtsf1l, Spdye4c,
## Pin1rt1, Gm11060, 1700021F07Rik, Pramel7, 1700010B08Rik, Tmco5, Abo, Spin2d,
## Gm18336, Trex2, Gm9, Rhox4a2, Rhox2b, Rhox4c, Rhox2g, Rhox2h, 2010106E10Rik,
## Ube2dnl1, Cldn34c2, Akap4, Opn1mw, Kir3dl2, Serpina7, 4933428M09Rik, Vgll1,
## Gm15262, Cldn34d, 1700020N15Rik, Ssxb5, Gm6592, H2ap, H2al3, Slc6a14, Gm9112,
## Cdx4, Rhox6, Rhox8, Arl14, Cd5l, Gja8, Gm21962, Adh6a, Clca3a2, Clca3b, Gm1527,
## Lrriq4, Lrrc31, Lce1g, Mup9, Mup17, Slc2a7, Skint8, Skint9, Zpld2, Gm13288,
## Cyp2j5, Slfnl1, 1700012P22Rik, Gjb4, Mup2, Gm13306, Olfr156, Olfr272, Pla2g2f,
## Pla2g2e, Ocm, 1700001J03Rik, Cdx2, Speer4c, Gnrhr, Tmprss11b, Tmprss11e,
## Gm28434, Gm21698, Gm10220, Gm7361, Dthd1, Sds, Tmem270, Gal3st4, Drd5, Tcf23,
## Nobox, Smco2, Clec4f, Klrb1, Clec2e, Clec12b, Gm6619, Vmn1r15, Vmn1r21, Rnf133,
## Hyal4, Nat8f7, Vmn1r33, Figla, Olfr675, Sult2a1, Klk11, Klk5, Klk15, Cyp2f2,
## Klk1b24, Crx, Olfr694, Mill1, AU018091, Apoc2, Vmn1r55, Olfr520, Vmn2r54,
## Cuzd1, Cnfn, Tex36, Mrgprx1, 1700015G11Rik, Scnn1g, Muc5b, Vmn1r85, Lyz1,
## Gm7075, Anapc15-ps, Rfx6, Rdh1, Rdh16f1, Mettl7b, Prf1, Taar4, Gstt4, Casp14,
## Gm4767, 4932415D10Rik, 2310057J18Rik, Atp4b, Cldn24, BB014433, 4930467E23Rik,
## Gm31371, Defb2, Anxa10, Sgo2b, Slc38a8, Gm10663, Nkx6-3, Ces1c, Ces1e, Cd209c,
## Gm49320, Slc10a2, Olfr372, Cib3, Chrnb3, Gm10358, Gm10999, Trhr2, Defb48,
## Chrna2, Synb, Olfr727, Ang2, Gm21750, Gm49387, Fam170b, Npy4r, 9230112D13Rik,
## Mat1a, Sftpa1, Gm8126, Cma1, Olfr720, Gm2916, Ankk1, Pou2af1, Olfr912, Olfr967,
## Olfr976, Trim43b, Tbc1d21, Apoa5, Prss45, Fam240a, Olfr224, Krt12, Krt39,
## Krt31, Krt42, Smim23, BC053393, Havcr1, Olfr1396, Olfr10, Olfr1388, Ppy,
## Olfr1378, Tspan10, Rnf222, Il5, Spaca3, Tex19.1, Alox12b, Slfn4, Lypd8, Npc1l1,
## Ush1g, Slc16a5, H1f9, Olfr394, Vmn1r197, Naip1, Prl3b1, Prl5a1, Akr1c18,
## Zfp998, Olfr1535, Cox8c, Gphb5, Acot5, Cyp2d9, Tas2r119, Ly6m, Ly6g2, Odf1,
## Gm7489, Tex33, Krt72, Gm10337, Smgc, Spatc1, Krt86, Slc9c1, Dppa4, Tssk1,
## Itgb2l, A730009L09Rik, Krtap26-1, Fetub, Stfa2l1, Dcpp2, Btnl4, Cyp21a1,
## Ly6g5c, Cdsn, 2300002M23Rik, Gm8909, H2-T3, Tcp10b, Olfr107, Crisp3, Crisp1,
## Fpr2, Vmn2r92, Vmn1r224, Vmn1r229, Clpsl2, Gm17657, Glp1r, Tff2, Cyp4f37,
## 2410137M14Rik, Gm35060, Wnt8a, 1700034E13Rik, Gykl1, Hrh4, Lyzl1, Lipf,
## Cyp2c39, Slc22a30, Cblif, Olfr1419, Glyat, Cntf, Cabp2.
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.001..."
## [1] "pK = 0.005..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 20%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## Warning in PrepDR5(object = object, features = features, layer = layer, : The
## following features were not available: Becn2, Mettl21e, Teddm1b, A030014E15Rik,
## Krtap28-13, Ccl20, Xkr9, C4bp, Pigr, Fmo3, Cryga, Gm4846, Gal3st2b, Fcrlb,
## Gpr25, Serpinb11, Cfhr4, Ifi208, Ifi202b, Gm14444, 2210418O10Rik, Gm14409,
## Defb45, Defb36, 4933409G03Rik, Bpifb9a, Tgm7, Gm15557, Gtsf1l, Spdye4c,
## Pin1rt1, Gm11060, 1700021F07Rik, Pramel7, 1700010B08Rik, Tmco5, Abo, Spin2d,
## Gm18336, Trex2, Gm9, Rhox4a2, Rhox2b, Rhox4c, Rhox2g, Rhox2h, 2010106E10Rik,
## Ube2dnl1, Cldn34c2, Akap4, Opn1mw, Kir3dl2, Serpina7, 4933428M09Rik, Vgll1,
## Gm15262, Cldn34d, 1700020N15Rik, Ssxb5, Gm6592, H2ap, H2al3, Slc6a14, Gm9112,
## Cdx4, Rhox6, Rhox8, Arl14, Cd5l, Gja8, Gm21962, Adh6a, Clca3a2, Clca3b, Gm1527,
## Lrriq4, Lrrc31, Lce1g, Mup9, Mup17, Slc2a7, Skint8, Skint9, Zpld2, Gm13288,
## Cyp2j5, Slfnl1, 1700012P22Rik, Gjb4, Mup2, Gm13306, Olfr156, Olfr272, Pla2g2f,
## Pla2g2e, Ocm, 1700001J03Rik, Cdx2, Speer4c, Gnrhr, Tmprss11b, Tmprss11e,
## Gm28434, Gm21698, Gm10220, Gm7361, Dthd1, Sds, Tmem270, Gal3st4, Drd5, Tcf23,
## Nobox, Smco2, Clec4f, Klrb1, Clec2e, Clec12b, Gm6619, Vmn1r15, Vmn1r21, Rnf133,
## Hyal4, Nat8f7, Vmn1r33, Figla, Olfr675, Sult2a1, Klk11, Klk5, Klk15, Cyp2f2,
## Klk1b24, Crx, Olfr694, Mill1, AU018091, Apoc2, Vmn1r55, Olfr520, Vmn2r54,
## Cuzd1, Cnfn, Tex36, Mrgprx1, 1700015G11Rik, Scnn1g, Muc5b, Vmn1r85, Lyz1,
## Gm7075, Anapc15-ps, Rfx6, Rdh1, Rdh16f1, Mettl7b, Prf1, Taar4, Gstt4, Casp14,
## Gm4767, 4932415D10Rik, 2310057J18Rik, Atp4b, Cldn24, BB014433, 4930467E23Rik,
## Gm31371, Defb2, Anxa10, Sgo2b, Slc38a8, Gm10663, Nkx6-3, Ces1c, Ces1e, Cd209c,
## Gm49320, Slc10a2, Olfr372, Cib3, Chrnb3, Gm10358, Gm10999, Trhr2, Defb48,
## Chrna2, Synb, Olfr727, Ang2, Gm21750, Gm49387, Fam170b, Npy4r, 9230112D13Rik,
## Mat1a, Sftpa1, Gm8126, Cma1, Olfr720, Gm2916, Ankk1, Pou2af1, Olfr912, Olfr967,
## Olfr976, Trim43b, Tbc1d21, Apoa5, Prss45, Fam240a, Olfr224, Krt12, Krt39,
## Krt31, Krt42, Smim23, BC053393, Havcr1, Olfr1396, Olfr10, Olfr1388, Ppy,
## Olfr1378, Tspan10, Rnf222, Il5, Spaca3, Tex19.1, Alox12b, Slfn4, Lypd8, Npc1l1,
## Ush1g, Slc16a5, H1f9, Olfr394, Vmn1r197, Naip1, Prl3b1, Prl5a1, Akr1c18,
## Zfp998, Olfr1535, Cox8c, Gphb5, Acot5, Cyp2d9, Tas2r119, Ly6m, Ly6g2, Odf1,
## Gm7489, Tex33, Krt72, Gm10337, Smgc, Spatc1, Krt86, Slc9c1, Dppa4, Tssk1,
## Itgb2l, A730009L09Rik, Krtap26-1, Fetub, Stfa2l1, Dcpp2, Btnl4, Cyp21a1,
## Ly6g5c, Cdsn, 2300002M23Rik, Gm8909, H2-T3, Tcp10b, Olfr107, Crisp3, Crisp1,
## Fpr2, Vmn2r92, Vmn1r224, Vmn1r229, Clpsl2, Gm17657, Glp1r, Tff2, Cyp4f37,
## 2410137M14Rik, Gm35060, Wnt8a, 1700034E13Rik, Gykl1, Hrh4, Lyzl1, Lipf,
## Cyp2c39, Slc22a30, Cblif, Olfr1419, Glyat, Cntf, Cabp2.
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.001..."
## [1] "pK = 0.005..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 25%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## Warning in PrepDR5(object = object, features = features, layer = layer, : The
## following features were not available: Becn2, Mettl21e, Teddm1b, A030014E15Rik,
## Krtap28-13, Ccl20, Xkr9, C4bp, Pigr, Fmo3, Cryga, Gm4846, Gal3st2b, Fcrlb,
## Gpr25, Serpinb11, Cfhr4, Ifi208, Ifi202b, Gm14444, 2210418O10Rik, Gm14409,
## Defb45, Defb36, 4933409G03Rik, Bpifb9a, Tgm7, Gm15557, Gtsf1l, Spdye4c,
## Pin1rt1, Gm11060, 1700021F07Rik, Pramel7, 1700010B08Rik, Tmco5, Abo, Spin2d,
## Gm18336, Trex2, Gm9, Rhox4a2, Rhox2b, Rhox4c, Rhox2g, Rhox2h, 2010106E10Rik,
## Ube2dnl1, Cldn34c2, Akap4, Opn1mw, Kir3dl2, Serpina7, 4933428M09Rik, Vgll1,
## Gm15262, Cldn34d, 1700020N15Rik, Ssxb5, Gm6592, H2ap, H2al3, Slc6a14, Gm9112,
## Cdx4, Rhox6, Rhox8, Arl14, Cd5l, Gja8, Gm21962, Adh6a, Clca3a2, Clca3b, Gm1527,
## Lrriq4, Lrrc31, Lce1g, Mup9, Mup17, Slc2a7, Skint8, Skint9, Zpld2, Gm13288,
## Cyp2j5, Slfnl1, 1700012P22Rik, Gjb4, Mup2, Gm13306, Olfr156, Olfr272, Pla2g2f,
## Pla2g2e, Ocm, 1700001J03Rik, Cdx2, Speer4c, Gnrhr, Tmprss11b, Tmprss11e,
## Gm28434, Gm21698, Gm10220, Gm7361, Dthd1, Sds, Tmem270, Gal3st4, Drd5, Tcf23,
## Nobox, Smco2, Clec4f, Klrb1, Clec2e, Clec12b, Gm6619, Vmn1r15, Vmn1r21, Rnf133,
## Hyal4, Nat8f7, Vmn1r33, Figla, Olfr675, Sult2a1, Klk11, Klk5, Klk15, Cyp2f2,
## Klk1b24, Crx, Olfr694, Mill1, AU018091, Apoc2, Vmn1r55, Olfr520, Vmn2r54,
## Cuzd1, Cnfn, Tex36, Mrgprx1, 1700015G11Rik, Scnn1g, Muc5b, Vmn1r85, Lyz1,
## Gm7075, Anapc15-ps, Rfx6, Rdh1, Rdh16f1, Mettl7b, Prf1, Taar4, Gstt4, Casp14,
## Gm4767, 4932415D10Rik, 2310057J18Rik, Atp4b, Cldn24, BB014433, 4930467E23Rik,
## Gm31371, Defb2, Anxa10, Sgo2b, Slc38a8, Gm10663, Nkx6-3, Ces1c, Ces1e, Cd209c,
## Gm49320, Slc10a2, Olfr372, Cib3, Chrnb3, Gm10358, Gm10999, Trhr2, Defb48,
## Chrna2, Synb, Olfr727, Ang2, Gm21750, Gm49387, Fam170b, Npy4r, 9230112D13Rik,
## Mat1a, Sftpa1, Gm8126, Cma1, Olfr720, Gm2916, Ankk1, Pou2af1, Olfr912, Olfr967,
## Olfr976, Trim43b, Tbc1d21, Apoa5, Prss45, Fam240a, Olfr224, Krt12, Krt39,
## Krt31, Krt42, Smim23, BC053393, Havcr1, Olfr1396, Olfr10, Olfr1388, Ppy,
## Olfr1378, Tspan10, Rnf222, Il5, Spaca3, Tex19.1, Alox12b, Slfn4, Lypd8, Npc1l1,
## Ush1g, Slc16a5, H1f9, Olfr394, Vmn1r197, Naip1, Prl3b1, Prl5a1, Akr1c18,
## Zfp998, Olfr1535, Cox8c, Gphb5, Acot5, Cyp2d9, Tas2r119, Ly6m, Ly6g2, Odf1,
## Gm7489, Tex33, Krt72, Gm10337, Smgc, Spatc1, Krt86, Slc9c1, Dppa4, Tssk1,
## Itgb2l, A730009L09Rik, Krtap26-1, Fetub, Stfa2l1, Dcpp2, Btnl4, Cyp21a1,
## Ly6g5c, Cdsn, 2300002M23Rik, Gm8909, H2-T3, Tcp10b, Olfr107, Crisp3, Crisp1,
## Fpr2, Vmn2r92, Vmn1r224, Vmn1r229, Clpsl2, Gm17657, Glp1r, Tff2, Cyp4f37,
## 2410137M14Rik, Gm35060, Wnt8a, 1700034E13Rik, Gykl1, Hrh4, Lyzl1, Lipf,
## Cyp2c39, Slc22a30, Cblif, Olfr1419, Glyat, Cntf, Cabp2.
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.001..."
## [1] "pK = 0.005..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 30%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## Warning in PrepDR5(object = object, features = features, layer = layer, : The
## following features were not available: Becn2, Mettl21e, Teddm1b, A030014E15Rik,
## Krtap28-13, Ccl20, Xkr9, C4bp, Pigr, Fmo3, Cryga, Gm4846, Gal3st2b, Fcrlb,
## Gpr25, Serpinb11, Cfhr4, Ifi208, Ifi202b, Gm14444, 2210418O10Rik, Gm14409,
## Defb45, Defb36, 4933409G03Rik, Bpifb9a, Tgm7, Gm15557, Gtsf1l, Spdye4c,
## Pin1rt1, Gm11060, 1700021F07Rik, Pramel7, 1700010B08Rik, Tmco5, Abo, Spin2d,
## Gm18336, Trex2, Gm9, Rhox4a2, Rhox2b, Rhox4c, Rhox2g, Rhox2h, 2010106E10Rik,
## Ube2dnl1, Cldn34c2, Akap4, Opn1mw, Kir3dl2, Serpina7, 4933428M09Rik, Vgll1,
## Gm15262, Cldn34d, 1700020N15Rik, Ssxb5, Gm6592, H2ap, H2al3, Slc6a14, Gm9112,
## Cdx4, Rhox6, Rhox8, Arl14, Cd5l, Gja8, Gm21962, Adh6a, Clca3a2, Clca3b, Gm1527,
## Lrriq4, Lrrc31, Lce1g, Mup9, Mup17, Slc2a7, Skint8, Skint9, Zpld2, Gm13288,
## Cyp2j5, Slfnl1, 1700012P22Rik, Gjb4, Mup2, Gm13306, Olfr156, Olfr272, Pla2g2f,
## Pla2g2e, Ocm, 1700001J03Rik, Cdx2, Speer4c, Gnrhr, Tmprss11b, Tmprss11e,
## Gm28434, Gm21698, Gm10220, Gm7361, Dthd1, Sds, Tmem270, Gal3st4, Drd5, Tcf23,
## Nobox, Smco2, Clec4f, Klrb1, Clec2e, Clec12b, Gm6619, Vmn1r15, Vmn1r21, Rnf133,
## Hyal4, Nat8f7, Vmn1r33, Figla, Olfr675, Sult2a1, Klk11, Klk5, Klk15, Cyp2f2,
## Klk1b24, Crx, Olfr694, Mill1, AU018091, Apoc2, Vmn1r55, Olfr520, Vmn2r54,
## Cuzd1, Cnfn, Tex36, Mrgprx1, 1700015G11Rik, Scnn1g, Muc5b, Vmn1r85, Lyz1,
## Gm7075, Anapc15-ps, Rfx6, Rdh1, Rdh16f1, Mettl7b, Prf1, Taar4, Gstt4, Casp14,
## Gm4767, 4932415D10Rik, 2310057J18Rik, Atp4b, Cldn24, BB014433, 4930467E23Rik,
## Gm31371, Defb2, Anxa10, Sgo2b, Slc38a8, Gm10663, Nkx6-3, Ces1c, Ces1e, Cd209c,
## Gm49320, Slc10a2, Olfr372, Cib3, Chrnb3, Gm10358, Gm10999, Trhr2, Defb48,
## Chrna2, Synb, Olfr727, Ang2, Gm21750, Gm49387, Fam170b, Npy4r, 9230112D13Rik,
## Mat1a, Sftpa1, Gm8126, Cma1, Olfr720, Gm2916, Ankk1, Pou2af1, Olfr912, Olfr967,
## Olfr976, Trim43b, Tbc1d21, Apoa5, Prss45, Fam240a, Olfr224, Krt12, Krt39,
## Krt31, Krt42, Smim23, BC053393, Havcr1, Olfr1396, Olfr10, Olfr1388, Ppy,
## Olfr1378, Tspan10, Rnf222, Il5, Spaca3, Tex19.1, Alox12b, Slfn4, Lypd8, Npc1l1,
## Ush1g, Slc16a5, H1f9, Olfr394, Vmn1r197, Naip1, Prl3b1, Prl5a1, Akr1c18,
## Zfp998, Olfr1535, Cox8c, Gphb5, Acot5, Cyp2d9, Tas2r119, Ly6m, Ly6g2, Odf1,
## Gm7489, Tex33, Krt72, Gm10337, Smgc, Spatc1, Krt86, Slc9c1, Dppa4, Tssk1,
## Itgb2l, A730009L09Rik, Krtap26-1, Fetub, Stfa2l1, Dcpp2, Btnl4, Cyp21a1,
## Ly6g5c, Cdsn, 2300002M23Rik, Gm8909, H2-T3, Tcp10b, Olfr107, Crisp3, Crisp1,
## Fpr2, Vmn2r92, Vmn1r224, Vmn1r229, Clpsl2, Gm17657, Glp1r, Tff2, Cyp4f37,
## 2410137M14Rik, Gm35060, Wnt8a, 1700034E13Rik, Gykl1, Hrh4, Lyzl1, Lipf,
## Cyp2c39, Slc22a30, Cblif, Olfr1419, Glyat, Cntf, Cabp2.
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.001..."
## [1] "pK = 0.005..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
sweep.stats <- summarizeSweep(sweep.res.list, GT = FALSE)
bcmvn <- find.pK(sweep.stats)

## NULL
# pK plot with numbers

pK=as.numeric(as.character(bcmvn$pK))
BCmetric=bcmvn$BCmetric
pK_choose = pK[which(BCmetric %in% max(BCmetric))]

par(mar=c(5,4,4,8)+1,cex.main=1.2,font.main=2)
plot(x = pK, y = BCmetric, pch = 16,type="b",
     col = "blue",lty=1)
abline(v=pK_choose,lwd=2,col='red',lty=2)
title("The BCmvn distributions")
text(pK_choose,max(BCmetric),as.character(pK_choose),pos = 4,col = "red")

# Homotypic Doublet Proportion Estimate

homotypic.prop <- modelHomotypic(Embryo@meta.data$RNA_snn_res.0.35)           ## ex: annotations <- seu_kidney@meta.data$ClusteringResults
nExp_poi <- round(0.09*nrow(EmbryoWt@meta.data))  ## Assuming 8% doublet formation rate - tailor for your dataset
nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))

EmbryoWt <- doubletFinder(EmbryoWt, PCs = 1:20,
                                 pN = 0.25, pK = pK_choose,
                                 nExp = nExp_poi, reuse.pANN = FALSE,
                                 sct = FALSE)
## [1] "Creating 3207 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## Warning in PrepDR5(object = object, features = features, layer = layer, : The
## following features were not available: Becn2, Mettl21e, Teddm1b, A030014E15Rik,
## Krtap28-13, Ccl20, Xkr9, C4bp, Pigr, Fmo3, Cryga, Gm4846, Gal3st2b, Fcrlb,
## Gpr25, Serpinb11, Cfhr4, Ifi208, Ifi202b, Gm14444, 2210418O10Rik, Gm14409,
## Defb45, Defb36, 4933409G03Rik, Bpifb9a, Tgm7, Gm15557, Gtsf1l, Spdye4c,
## Pin1rt1, Gm11060, 1700021F07Rik, Pramel7, 1700010B08Rik, Tmco5, Abo, Spin2d,
## Gm18336, Trex2, Gm9, Rhox4a2, Rhox2b, Rhox4c, Rhox2g, Rhox2h, 2010106E10Rik,
## Ube2dnl1, Cldn34c2, Akap4, Opn1mw, Kir3dl2, Serpina7, 4933428M09Rik, Vgll1,
## Gm15262, Cldn34d, 1700020N15Rik, Ssxb5, Gm6592, H2ap, H2al3, Slc6a14, Gm9112,
## Cdx4, Rhox6, Rhox8, Arl14, Cd5l, Gja8, Gm21962, Adh6a, Clca3a2, Clca3b, Gm1527,
## Lrriq4, Lrrc31, Lce1g, Mup9, Mup17, Slc2a7, Skint8, Skint9, Zpld2, Gm13288,
## Cyp2j5, Slfnl1, 1700012P22Rik, Gjb4, Mup2, Gm13306, Olfr156, Olfr272, Pla2g2f,
## Pla2g2e, Ocm, 1700001J03Rik, Cdx2, Speer4c, Gnrhr, Tmprss11b, Tmprss11e,
## Gm28434, Gm21698, Gm10220, Gm7361, Dthd1, Sds, Tmem270, Gal3st4, Drd5, Tcf23,
## Nobox, Smco2, Clec4f, Klrb1, Clec2e, Clec12b, Gm6619, Vmn1r15, Vmn1r21, Rnf133,
## Hyal4, Nat8f7, Vmn1r33, Figla, Olfr675, Sult2a1, Klk11, Klk5, Klk15, Cyp2f2,
## Klk1b24, Crx, Olfr694, Mill1, AU018091, Apoc2, Vmn1r55, Olfr520, Vmn2r54,
## Cuzd1, Cnfn, Tex36, Mrgprx1, 1700015G11Rik, Scnn1g, Muc5b, Vmn1r85, Lyz1,
## Gm7075, Anapc15-ps, Rfx6, Rdh1, Rdh16f1, Mettl7b, Prf1, Taar4, Gstt4, Casp14,
## Gm4767, 4932415D10Rik, 2310057J18Rik, Atp4b, Cldn24, BB014433, 4930467E23Rik,
## Gm31371, Defb2, Anxa10, Sgo2b, Slc38a8, Gm10663, Nkx6-3, Ces1c, Ces1e, Cd209c,
## Gm49320, Slc10a2, Olfr372, Cib3, Chrnb3, Gm10358, Gm10999, Trhr2, Defb48,
## Chrna2, Synb, Olfr727, Ang2, Gm21750, Gm49387, Fam170b, Npy4r, 9230112D13Rik,
## Mat1a, Sftpa1, Gm8126, Cma1, Olfr720, Gm2916, Ankk1, Pou2af1, Olfr912, Olfr967,
## Olfr976, Trim43b, Tbc1d21, Apoa5, Prss45, Fam240a, Olfr224, Krt12, Krt39,
## Krt31, Krt42, Smim23, BC053393, Havcr1, Olfr1396, Olfr10, Olfr1388, Ppy,
## Olfr1378, Tspan10, Rnf222, Il5, Spaca3, Tex19.1, Alox12b, Slfn4, Lypd8, Npc1l1,
## Ush1g, Slc16a5, H1f9, Olfr394, Vmn1r197, Naip1, Prl3b1, Prl5a1, Akr1c18,
## Zfp998, Olfr1535, Cox8c, Gphb5, Acot5, Cyp2d9, Tas2r119, Ly6m, Ly6g2, Odf1,
## Gm7489, Tex33, Krt72, Gm10337, Smgc, Spatc1, Krt86, Slc9c1, Dppa4, Tssk1,
## Itgb2l, A730009L09Rik, Krtap26-1, Fetub, Stfa2l1, Dcpp2, Btnl4, Cyp21a1,
## Ly6g5c, Cdsn, 2300002M23Rik, Gm8909, H2-T3, Tcp10b, Olfr107, Crisp3, Crisp1,
## Fpr2, Vmn2r92, Vmn1r224, Vmn1r229, Clpsl2, Gm17657, Glp1r, Tff2, Cyp4f37,
## 2410137M14Rik, Gm35060, Wnt8a, 1700034E13Rik, Gykl1, Hrh4, Lyzl1, Lipf,
## Cyp2c39, Slc22a30, Cblif, Olfr1419, Glyat, Cntf, Cabp2.
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
p1 <- DimPlot(EmbryoWt, group.by=names(EmbryoWt@meta.data[length(EmbryoWt@meta.data)]),
                 cols = c("red","grey"),
                  reduction="umap", pt.size=0.3)

p2 <- FeaturePlot(EmbryoWt, reduction = "umap",
                  features =  "nFeature_RNA")


p1+p2

table(EmbryoWt@meta.data[length(EmbryoWt@meta.data)])
## DF.classifications_0.25_0.12_866
## Doublet Singlet 
##     866    8756
# It does not look like it is telling apart the doublets in most cases, looks like an overshooting. 
# Most doublets must have been taken out in the main Liver analysis by HTO selection

dittoBarPlot(object = EmbryoWt,
             var = names(EmbryoWt@meta.data[length(EmbryoWt@meta.data)]),
             group.by = "RNA_snn_res.0.35")

VlnPlot(EmbryoWt, features =  "Ptprc",
        group.by =  "RNA_snn_res.0.35", 
        pt.size = 0.05 ) + theme(legend.position="none")

EmbryoWt <- EmbryoWt[, which(EmbryoLOF@meta.data[length(EmbryoLOF@meta.data)] == "Singlet")]

0.7 Merging and Performing QC Again

Embryo <- merge(EmbryoLOF, EmbryoWt, project = "Embryo_N123LOF_Mosaic")

# Normalize data

Embryo <- NormalizeData(Embryo, 
                                normalization.method = "LogNormalize",
                                scale.factor = 10000)
## Normalizing layer: counts.1.1
## Normalizing layer: counts.2.2
# Identification of highly variable features (feature selection)

Embryo <- FindVariableFeatures(Embryo, 
                                       selection.method = "vst",
                                       nfeatures = 2000)
## Finding variable features for layer counts.1.1
## Finding variable features for layer counts.2.2
top10 <- head(VariableFeatures(Embryo), 10)
top10
##  [1] "Actc1" "Mylpf" "Ttn"   "Myl1"  "Tnnc1" "Acta2" "Myog"  "Nefm"  "Tnni1"
## [10] "Myl4"
vars <- VariableFeatures(Embryo)

vars <- as.data.frame(vars) 

vars %>%  filter(!grepl("^Hb[^(p)]", 1)) %>% filter(!grepl("^mt-", 1)) %>% filter(!grepl("^Rp[sl]", 1)) %>% nrow()
## [1] 2000
# There are no mitochondrial, ribosomal or hemmoglobine related genes in the to 2000 variable Features

plot1 <- VariableFeaturePlot(Embryo)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
## When using repel, set xnudge and ynudge to 0 for optimal results
# cairo_pdf("../Plots/QC/Variable_features_Singlets.pdf", height = 6, width = 8)
plot2
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
## Warning: Removed 305 rows containing missing values or values outside the scale range
## (`geom_point()`).

# dev.off()

# Scaling the data

all.genes <- rownames(Embryo)
Embryo <- ScaleData(Embryo, features = all.genes)
## Centering and scaling data matrix

0.8 Perform Linear Dimensional Reduction

## Perform linear dimensional reduction

Embryo <- RunPCA(Embryo,
                         features = VariableFeatures(object = Embryo))
## PC_ 1 
## Positive:  Lgals1, Gpc3, Sparc, Col1a2, Fbn2, Col3a1, Tpm1, Hmga2, Col1a1, Fn1 
##     Anxa2, Fos, Mest, Twist1, Prrx1, Jun, Col5a2, Peg3, Ldha, Hmgb2 
##     Dlc1, Id1, Fbxl7, Hmcn1, Cped1, Cyba, Itm2a, Ptn, S100a10, Prrx2 
## Negative:  Rtn1, Tubb3, Stmn3, Elavl3, Dcx, Map2, Ina, Crmp1, Myt1l, Tagln3 
##     Scg5, Rims2, Stmn2, Akap6, Soga3, Kif5c, Nsg2, Elavl2, Rab3c, Cadps 
##     Elavl4, Rufy3, Scg3, Mapt, Mllt11, Pcsk1n, Celf4, Tubb2b, Igfbpl1, Dpysl5 
## PC_ 2 
## Positive:  Gpc3, Col1a2, Col3a1, Fbn2, Aff3, Rbms3, Col1a1, Gpc6, Ptprd, Ptn 
##     Adgrl3, Bnc2, Zfhx4, Fbxl7, Nfib, Tenm3, Pbx1, Ror1, Peg3, Col5a2 
##     Fn1, Robo1, Prrx1, Rora, Efna5, Vcan, Sema3a, Slit3, Aff2, Mmp16 
## Negative:  Gmfg, Lcp1, Smagp, Icam2, Nrros, Esam, Fxyd5, Inpp5d, Gimap6, S100a16 
##     Cldn5, Ptpn18, Gimap1, Cd93, Plvap, Cdh5, Tgfb1, Pecam1, Rasgrp3, Samsn1 
##     Myct1, Gatm, Kdr, Laptm5, Flt1, Stab1, Gngt2, Plxnd1, Lmo2, Ecscr 
## PC_ 3 
## Positive:  Hmgb2, Top2a, H2az1, Cdca8, Nusap1, Mki67, Kif11, Pclaf, Birc5, H2ax 
##     Prc1, Cenpf, Tpx2, Spc25, Aurkb, H1f5, Smc4, Cks2, Ube2c, H3c3 
##     Ckap2l, Cenpe, Cdk1, H2ac24, H2ac8, Cdca3, Incenp, H4c4, Mis18bp1, H2bc18 
## Negative:  Actc1, Ttn, Myog, Tnnt2, Tnni1, Klhl41, Myl1, Neb, Trim55, Mymk 
##     Mylpf, Des, Atp2a1, Tnnc1, Tnnt1, Myh3, Myod1, Cryab, Myl4, Mylk4 
##     Chrna1, Synpo2l, Ablim3, Smyd1, Il17b, Hspb2, Unc45b, Actn2, Fitm1, Apobec2 
## PC_ 4 
## Positive:  Cdh5, Icam2, Esam, Kdr, Cldn5, Gimap6, Pecam1, Flt1, Cd93, Emcn 
##     Ecscr, Plvap, Rasgrp3, Plxnd1, Gmfg, Myct1, Smagp, Lcp1, Gimap1, Egfl7 
##     Gngt2, Adgrf5, Fxyd5, Eng, Tie1, Cd34, Rasip1, Ctla2a, Adgrl4, Ramp2 
## Negative:  Top2a, Nusap1, Cdca8, Prc1, Cenpf, Kif11, Spc25, H3c3, Hmgb2, Ube2c 
##     Mki67, Tpx2, H1f5, H2ac4, Pclaf, Cks2, H2ac8, H2az1, Cenpe, Ckap2l 
##     Aurkb, Knl1, Mis18bp1, Smc4, H2ac24, Birc5, H2bc18, Fbxo5, Hmmr, Cdca3 
## PC_ 5 
## Positive:  Rac2, Tyrobp, Spi1, Fcer1g, Lst1, Psmb8, Ptprc, Ptpn6, Coro1a, Cd53 
##     Laptm5, Fcgr3, Ms4a6c, Dock2, Plek, Cd52, Arhgap30, Fyb, Csf1r, Ly86 
##     Cyth4, Gpr65, Cd86, Ctsc, Ncf2, Ctss, Bin2, C1qb, Aif1, Ms4a6b 
## Negative:  Cdh5, Esam, Cldn5, Plvap, Kdr, Emcn, Ecscr, Flt1, Icam2, Pecam1 
##     Cd93, Myct1, S100a16, Tie1, Adgrf5, Rasip1, Ptprb, Gimap6, Gimap4, Klhl4 
##     Eng, Plxnd1, Aplnr, Apold1, Sox17, Fam167b, Adgrl4, Ramp2, Cyyr1, Sox18
# Examine and visualize PCA results a few different ways

print(Embryo[["pca"]], dims = 1:5, nfeatures = 5)
## PC_ 1 
## Positive:  Lgals1, Gpc3, Sparc, Col1a2, Fbn2 
## Negative:  Rtn1, Tubb3, Stmn3, Elavl3, Dcx 
## PC_ 2 
## Positive:  Gpc3, Col1a2, Col3a1, Fbn2, Aff3 
## Negative:  Gmfg, Lcp1, Smagp, Icam2, Nrros 
## PC_ 3 
## Positive:  Hmgb2, Top2a, H2az1, Cdca8, Nusap1 
## Negative:  Actc1, Ttn, Myog, Tnnt2, Tnni1 
## PC_ 4 
## Positive:  Cdh5, Icam2, Esam, Kdr, Cldn5 
## Negative:  Top2a, Nusap1, Cdca8, Prc1, Cenpf 
## PC_ 5 
## Positive:  Rac2, Tyrobp, Spi1, Fcer1g, Lst1 
## Negative:  Cdh5, Esam, Cldn5, Plvap, Kdr
VizDimLoadings(Embryo, dims = 1:2, reduction = "pca")

DimPlot(Embryo,group.by = "orig.ident", reduction = "pca")

DimHeatmap(Embryo, dims = 1:9, cells = 500, balanced = TRUE)

# Determine the 'dimensionality' of the dataset

# Embryo <- JackStraw(Embryo, num.replicate = 100, dims = 30)
# Embryo <- ScoreJackStraw(Embryo, dims = 1:30)
# JackStrawPlot(Embryo, dims = 1:30)

# cairo_pdf("../Plots/QC/ElbowPlot_Singlets.pdf", height = 6, width = 6)
ElbowPlot(Embryo, ndims = 30)

# dev.off()

Out of the ElbowPlot it looks like 20 Dimensions is still a good number of PCAs to go for

# Cluster the cells

Embryo <- FindNeighbors(Embryo, dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
Embryo <- FindClusters(Embryo, resolution = 0.1, verbose = F)
Embryo <- FindClusters(Embryo, resolution = 0.2, verbose = F)
Embryo <- FindClusters(Embryo, resolution = 0.35, verbose = F)


#Run UMAP

Embryo <- RunUMAP(Embryo, dims = 1:20, verbose = F)
## Found more than one class "dist" in cache; using the first, from namespace 'spam'
## Also defined by 'BiocGenerics'
## Found more than one class "dist" in cache; using the first, from namespace 'spam'
## Also defined by 'BiocGenerics'
# cairo_pdf("../Plots/QC/UMAP_res0.35_Singlets.pdf", height = 6, width = 6)
DimPlot(Embryo, group.by = "RNA_snn_res.0.35")

# dev.off()

# cairo_pdf("../Plots/QC/UMAP_res0.35.split.Singlets.pdf", height = 6, width = 6)
DimPlot(Embryo, group.by = "RNA_snn_res.0.35", split.by = "Condition")

# dev.off()

Idents(Embryo) <- "Condition"

table(Embryo@active.ident)
## 
## N123LOF  N123WT 
##    9264    9264
# saveRDS(Embryo, "../rds/EmbryoN123.Singlets.rds")

1 Perform Label Transfer

I will perform Labeltransfer on datasets containing singlets

?celldex::MouseRNAseqData()

mmdb <- celldex::MouseRNAseqData()
## snapshotDate(): 2022-10-31
## see ?celldex and browseVignettes('celldex') for documentation
## loading from cache
## see ?celldex and browseVignettes('celldex') for documentation
## loading from cache
# Singlets
# Embryo <- readRDS("../rds/EmbryoN123.Singlets.rds")

# Doublets
# Embryo <- readRDS("../rds/EmbryoN123.withDoublets.rds")

data.filt <- JoinLayers(Embryo)
sce <- as.SingleCellExperiment(data.filt, assay = "RNA")



pred.Embryo.mm <- SingleR(test = sce, ref = mmdb, assay.type.test=1, 
                     labels = mmdb$label.main)
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
Embryo@meta.data$mmdb <- pred.Embryo.mm$labels

# Embryo@meta.data$hpca <- pred.Embryo.hs$labels

# Embryo@meta.data$bpen <- pred.Embryo.bpen$labels
p25 <- DimPlot(Embryo, pt.size = 1.2, group.by = "mmdb", reduction = "umap", split.by = "Condition")+ NoLegend()+
  theme(strip.text.x = element_text(size = 24), plot.title = element_blank())+NoLegend()+xlab("UMAP_1")+ylab("UMAP_2")

p26a <- DimPlot(Embryo, pt.size = 1.2, group.by = "mmdb", reduction = "umap")+theme(legend.text = element_text(size = 14), axis.title.x = element_blank(), axis.line.y = element_blank(), axis.text.y = element_blank(), axis.title.y = element_blank(), axis.ticks = element_blank(), plot.title = element_text(hjust = 0.5, size = 24))+ ggtitle("Mouse Database")
p26 <- LabelClusters(p26a, id = "mmdb", box = T, size = 3,
                     clusters = c("Cardiomyocytes", "Endothelial cells", "Epithelial cells", "Erythrocytes", "Fibroblasts", "Hepatocytes", "Macrophages", "Monocytes", "Neurons"),
                     repel = T, max.overlaps = 30)+ theme(legend.position="none")

require(scales)
## Loading required package: scales
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
Embryo@meta.data$mmdb <- as.factor(Embryo@meta.data$mmdb)

identities <- levels(Embryo@meta.data$mmdb)
my_color_palette <- hue_pal()(length(identities))

p26b <- dittoBarPlot(Embryo, var = "mmdb", group.by = "Condition", color.panel = my_color_palette)

p27 <- list(p25, p26, p26b)

design <- c(patchwork::area(1, 1, 1, 4), patchwork::area(1, 5, 1, 6), patchwork::area(1, 7, 1, 7))

p28 <- Reduce( `+`,  p27)+patchwork::plot_layout(design = design)



# cairo_pdf("../Plots/QC/UMAP_LabelTransfer_Doublets.pdf",  width = 25, height = 21, family = "Arial")
# cairo_pdf("../Plots/QC/UMAP_LabelTransfer.pdf",  width = 25, height = 21, family = "Arial")
p28

# dev.off()

table(Embryo@meta.data$Condition)
## 
## N123LOF  N123WT 
##    9264    9264
# saveRDS(Embryo, "../rds/EmbryoN123.Singlets.rds")
# saveRDS(Embryo, "../rds/EmbryoN123.Doublets.rds")

2 R Session

sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=es_ES.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=es_ES.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=es_ES.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] scales_1.3.0                ROCR_1.0-11                
##  [3] KernSmooth_2.23-20          fields_16.2                
##  [5] viridisLite_0.4.2           spam_2.10-0                
##  [7] ggrepel_0.9.5               lubridate_1.9.3            
##  [9] forcats_1.0.0               stringr_1.5.1              
## [11] purrr_1.0.2                 readr_2.1.5                
## [13] tidyr_1.3.1                 tibble_3.2.1               
## [15] tidyverse_2.0.0             reshape2_1.4.4             
## [17] escape_1.8.0                scRNAseq_2.12.0            
## [19] SingleCellExperiment_1.20.1 SingleR_2.0.0              
## [21] celldex_1.8.0               SummarizedExperiment_1.28.0
## [23] Biobase_2.58.0              GenomicRanges_1.50.2       
## [25] GenomeInfoDb_1.34.9         IRanges_2.32.0             
## [27] S4Vectors_0.36.2            BiocGenerics_0.44.0        
## [29] MatrixGenerics_1.10.0       matrixStats_1.1.0          
## [31] dplyr_1.1.4                 patchwork_1.2.0            
## [33] dittoSeq_1.10.0             DoubletFinder_2.0.4        
## [35] ggplot2_3.5.0               Seurat_5.0.3               
## [37] SeuratObject_5.0.1          sp_2.1-3                   
## [39] BiocStyle_2.26.0           
## 
## loaded via a namespace (and not attached):
##   [1] rappdirs_0.3.3                rtracklayer_1.58.0           
##   [3] scattermore_1.2               bit64_4.0.5                  
##   [5] knitr_1.45                    irlba_2.3.5.1                
##   [7] DelayedArray_0.24.0           data.table_1.15.4            
##   [9] KEGGREST_1.38.0               RCurl_1.98-1.14              
##  [11] AnnotationFilter_1.22.0       generics_0.1.3               
##  [13] GenomicFeatures_1.50.4        ScaledMatrix_1.6.0           
##  [15] cowplot_1.1.3                 RSQLite_2.3.6                
##  [17] RANN_2.6.1                    future_1.33.2                
##  [19] tzdb_0.4.0                    bit_4.0.5                    
##  [21] spatstat.data_3.0-4           xml2_1.3.6                   
##  [23] httpuv_1.6.15                 xfun_0.43                    
##  [25] hms_1.1.3                     jquerylib_0.1.4              
##  [27] babelgene_22.9                evaluate_0.23                
##  [29] promises_1.2.1                fansi_1.0.6                  
##  [31] restfulr_0.0.15               progress_1.2.3               
##  [33] dbplyr_2.3.4                  igraph_2.0.3                 
##  [35] DBI_1.2.2                     htmlwidgets_1.6.4            
##  [37] spatstat.geom_3.2-9           RSpectra_0.16-1              
##  [39] backports_1.4.1               bookdown_0.38                
##  [41] annotate_1.76.0               biomaRt_2.54.1               
##  [43] deldir_2.0-4                  sparseMatrixStats_1.10.0     
##  [45] vctrs_0.6.5                   ensembldb_2.22.0             
##  [47] abind_1.4-5                   cachem_1.0.8                 
##  [49] withr_3.0.0                   progressr_0.14.0             
##  [51] sctransform_0.4.1             GenomicAlignments_1.34.1     
##  [53] prettyunits_1.2.0             goftest_1.2-3                
##  [55] cluster_2.1.4                 ExperimentHub_2.6.0          
##  [57] dotCall64_1.1-1               lazyeval_0.2.2               
##  [59] crayon_1.5.2                  spatstat.explore_3.2-7       
##  [61] labeling_0.4.3                pkgconfig_2.0.3              
##  [63] nlme_3.1-159                  ProtGenerics_1.30.0          
##  [65] rlang_1.1.3                   globals_0.16.3               
##  [67] lifecycle_1.0.4               miniUI_0.1.1.1               
##  [69] filelock_1.0.3                fastDummies_1.7.3            
##  [71] BiocFileCache_2.6.1           rsvd_1.0.5                   
##  [73] AnnotationHub_3.6.0           polyclip_1.10-6              
##  [75] RcppHNSW_0.6.0                GSVA_1.46.0                  
##  [77] lmtest_0.9-40                 graph_1.76.0                 
##  [79] Matrix_1.6-5                  Rhdf5lib_1.20.0              
##  [81] zoo_1.8-12                    ggridges_0.5.6               
##  [83] pheatmap_1.0.12               png_0.1-8                    
##  [85] rjson_0.2.21                  bitops_1.0-8                 
##  [87] rhdf5filters_1.10.1           Biostrings_2.66.0            
##  [89] blob_1.2.4                    DelayedMatrixStats_1.20.0    
##  [91] parallelly_1.37.1             spatstat.random_3.2-3        
##  [93] beachmat_2.14.2               memoise_2.0.1                
##  [95] GSEABase_1.60.0               magrittr_2.0.3               
##  [97] plyr_1.8.9                    ica_1.0-3                    
##  [99] zlibbioc_1.44.0               compiler_4.2.1               
## [101] BiocIO_1.8.0                  RColorBrewer_1.1-3           
## [103] fitdistrplus_1.1-11           Rsamtools_2.14.0             
## [105] cli_3.6.2                     XVector_0.38.0               
## [107] listenv_0.9.1                 pbapply_1.7-2                
## [109] MASS_7.3-58                   tidyselect_1.2.1             
## [111] stringi_1.8.3                 highr_0.10                   
## [113] yaml_2.3.8                    BiocSingular_1.14.0          
## [115] grid_4.2.1                    sass_0.4.9                   
## [117] timechange_0.3.0              tools_4.2.1                  
## [119] future.apply_1.11.2           rstudioapi_0.16.0            
## [121] gridExtra_2.3                 farver_2.1.1                 
## [123] Rtsne_0.17                    digest_0.6.35                
## [125] BiocManager_1.30.22           shiny_1.8.1.1                
## [127] Rcpp_1.0.13                   broom_1.0.5                  
## [129] BiocVersion_3.16.0            later_1.3.2                  
## [131] RcppAnnoy_0.0.22              httr_1.4.7                   
## [133] AnnotationDbi_1.60.2          colorspace_2.1-0             
## [135] XML_3.99-0.16.1               tensor_1.5                   
## [137] reticulate_1.35.0             splines_4.2.1                
## [139] uwot_0.1.16                   spatstat.utils_3.0-4         
## [141] plotly_4.10.4                 xtable_1.8-4                 
## [143] jsonlite_1.8.8                R6_2.5.1                     
## [145] pillar_1.9.0                  htmltools_0.5.8.1            
## [147] mime_0.12                     glue_1.7.0                   
## [149] fastmap_1.1.1                 BiocParallel_1.32.6          
## [151] BiocNeighbors_1.16.0          interactiveDisplayBase_1.36.0
## [153] codetools_0.2-18              maps_3.4.2                   
## [155] utf8_1.2.4                    lattice_0.20-45              
## [157] bslib_0.7.0                   spatstat.sparse_3.0-3        
## [159] curl_5.2.1                    leiden_0.4.3.1               
## [161] magick_2.8.3                  survival_3.4-0               
## [163] rmarkdown_2.26                munsell_0.5.1                
## [165] rhdf5_2.42.1                  GenomeInfoDbData_1.2.9       
## [167] UCell_2.2.0                   HDF5Array_1.26.0             
## [169] msigdbr_7.5.1                 gtable_0.3.4